Sharedproject.ipynbOpen in CoCalc
Author: CHENG LV
Views : 17
In [21]:
import gzip import csv from collections import defaultdict import datetime as dt #Read in raw data and create variales indicator_a_file = gzip.open('/ext/data/FA2019/Assignment/HKG201501.csv.gz', mode='rt') indicator_a_data = csv.DictReader(indicator_a_file) indicator_a_data.fieldnames #compute trade_count, trade_volume,trade_size and close_price Trade=defaultdict(lambda:defaultdict(lambda:defaultdict(lambda:0))) for transaction in indicator_a_data: security = transaction['#RIC'] date_old= dt.datetime.strptime(transaction['Date-Time'][:10],'%Y-%m-%d') date=str(date_old.year)+'-'+str(date_old.month)+'-'+str(date_old.day) if transaction['Type'] == 'Trade' and transaction['Volume'] and transaction['Price'] and transaction['Qualifiers']!='Nominal Price[USER]': Trade[date][security]['Count'] += 1 Trade[date][security]['Volume'] += int(transaction['Volume']) Trade[date][security]['Size']=Trade[date][security]['Volume']/Trade[date][security]['Count'] Trade[date][security]['Price']=transaction['Price'] #save metrics into csv with open('./HKG_PRICE.csv', mode="w") as file: HKG_PRICE= csv.writer(file) header = ['date','security', 'price'] HKG_PRICE.writerow(header) for date in Trade: for sec in Trade[date]: date = date security = sec price= Trade[date][security]['Price'] HKG_PRICE.writerow([date,security,price]) with open('./Trade_Count_HKG01NEW.csv', mode="w") as file: Trade_Count_HKG01= csv.writer(file) header = ['date','security', 'trade_count','trade_volume','trade_size'] Trade_Count_HKG01.writerow(header) for date in Trade: for sec in Trade[date]: date = date security = sec trade_count= Trade[date][security]['Count'] trade_volume= Trade[date][security]['Volume'] trade_size= Trade[date][security]['Size'] Trade_Count_HKG01.writerow([date,security,trade_count,trade_volume,trade_size])
['#RIC', 'Alias Underlying RIC', 'Domain', 'Date-Time', 'GMT Offset', 'Type', 'Price', 'Volume', 'Bid Price', 'Bid Size', 'Ask Price', 'Ask Size', 'Qualifiers']
In [27]:
Trade=defaultdict(lambda:defaultdict(lambda:defaultdict(lambda:0))) for transaction in indicator_a_data: security = transaction['#RIC'] date_old= dt.datetime.strptime(transaction['Date-Time'][:10],'%Y-%m-%d') date=str(date_old.year)+'-'+str(date_old.month)+'-'+str(date_old.day) if transaction['Type'] == 'Trade' and transaction['Volume'] and transaction['Price'] and transaction['Qualifiers']!='Nominal Price[USER]': Trade[date][security]['Count'] += 1 Trade[date][security]['Volume'] += int(transaction['Volume']) Trade[date][security]['Size']=Trade[date][security]['Volume']/Trade[date][security]['Count'] Trade[date][security]['Price']=transaction['Price'] print(Trade)
WARNING: Some output was deleted.
In [39]:
import csv with open('./Trade_Count_HKG01NEW.csv', mode="w") as file: Trade_Count_HKG01= csv.writer(file) header = ['date','security', 'trade_count','trade_volume','trade_size'] Trade_Count_HKG01.writerow(header) for date in Trade: for sec in Trade[date]: date = date security = sec trade_count= Trade[date][security]['Count'] trade_volume= Trade[date][security]['Volume'] trade_size= Trade[date][security]['Size'] Trade_Count_HKG01.writerow([date,security,trade_count,trade_volume,trade_size])
In [29]:
import csv with open('./HKG_PRICE.csv', mode="w") as file: HKG_PRICE= csv.writer(file) header = ['date','security', 'price'] HKG_PRICE.writerow(header) for date in Trade: for sec in Trade[date]: date = date security = sec price= Trade[date][security]['Price'] HKG_PRICE.writerow([date,security,price])
In [16]:
# trade_volume = defaultdict(lambda:defaultdict(lambda:0)) di_volume = {} cnt = 20000 for trans in indicator_a_data: cnt -= 1 if cnt == 0: break if trans['Type'] == 'Trade' and trans['Volume']:#这句表示成交并且数量不为空 timestamp = dt.datetime.strptime(trans['Date-Time'][:-7], '%Y-%m-%dT%H:%M:%S.%f') ts_date = timestamp.replace(hour = 0, minute = 0, second = 0, microsecond = 0); if trans['#RIC'] in di_volume: if ts_date in di_volume[trans['#RIC']]: di_volume[trans['#RIC']][ts_date] += float(trans['Volume']) else: print('New Stock Date\n', trans) print(ts_date) di_volume[trans['#RIC']][ts_date] = float(trans['Volume']) else: di_volume[trans['#RIC']] = {} di_volume[trans['#RIC']][ts_date] = float(trans['Volume']) print('New stock\n', trans)
In [26]:
VOD_TAS_file = gzip.open('/ext/data/FA2019/Assignment/HKG201409.csv.gz', mode='rt') VOD_TAS_Data = csv.DictReader(VOD_TAS_file) # VOD_Depth_file = gzip.open('./data/VOD_Depth.csv.gz', mode='rt') # VOD_Depth_Data = csv.DictReader(VOD_Depth_file) read_cnt = 100 total_volume = 0 for trans in VOD_TAS_Data: read_cnt -= 1 if read_cnt == 0: break print(trans)
WARNING: Some output was deleted.
In [56]:
Trade_Count=defaultdict(lambda:defaultdict(lambda:0)) Trade_Volume=defaultdict(lambda:defaultdict(lambda:0)) Quote_Count=defaultdict(lambda:defaultdict(lambda:0)) Trade_Size=defaultdict(lambda:defaultdict(lambda:0)) # trade count && quote count && trade volume def compute(file): for transaction in file: security = transaction['#RIC'] date = dt.datetime.strptime(transaction['Date-Time'][:10], '%Y-%m-%d') if transaction['Type'] == 'Trade' and transaction['Volume'] and transaction['Price']: Trade_Count[security][date] += 1 Trade_Volume[security][date] += int(transaction['Volume']) Trade_Size[security][date]=Trade_Volume[security][date]/Trade_Count[security][date] if transaction['Type'] == 'Quote': Quote_Count[security][date] += 1 return (Trade_Size)
In [10]:
import csv from collections import defaultdict HTSC_TAS_File = open('./Lecture_11_Student/data/HTSC_TAS.csv', mode='rt') HTSC_TAS_Data = csv.DictReader(HTSC_TAS_File) Market_Is_Open = False Quote_Count = 0 Trade_Count = 0 for transaction in HTSC_TAS_Data: if transaction['Type'] == 'Mkt. Condition': if 'TRD' in transaction['Qualifiers']: Market_Is_Open = True else: Market_Is_Open = False if Market_Is_Open: if transaction['Type'] == 'Quote': Quote_Count += 1 if transaction['Type'] == 'Trade': Trade_Count += 1 print('Quote-To-Trade Ratio = ', (Quote_Count-Trade_Count)/Trade_Count)
Quote-To-Trade Ratio = 1.3898305084745763
In [30]:
import csv HTSC_TAS_File = open('./Lecture_11_Student/data/HTSC_depth.csv', mode='rt') HTSC_TAS_Data = csv.DictReader(HTSC_TAS_File) Market_Is_Open = False order_Count = 0 Trade_Count = 0 transaction_pre=None for transaction in HTSC_TAS_Data: if transaction['Type'] == 'Mkt. Condition': if 'TRD' in transaction['Qualifiers']: Market_Is_Open = True else: Market_Is_Open = False transaction_pre = None if Market_Is_Open: if transaction['Type'] and transaction_pre: if transaction_pre!=transaction: order_Count += 1 transaction_pre = transaction if transaction['Type'] == 'Trade': Trade_Count += 1 if not transaction_pre: transaction_pre=transaction print(order_Count) #print('order-To-Trade Ratio = ', (order_Count-Trade_Count)/Trade_Count)
0
In [ ]:
import csv HTSC_TAS_File = open('./Lecture_11_Student/data/HTSC_depth.csv', mode='rt') HTSC_TAS_Data = csv.DictReader(HTSC_TAS_File) Market_Is_Open = False order_Count = 0 Trade_Count = 0 transaction_pre=None for transaction in HTSC_TAS_Data: if transaction['Type'] == 'Mkt. Condition': if 'TRD' in transaction['Qualifiers']: Market_Is_Open = True else: Market_Is_Open = False transaction_pre = None if Market_Is_Open: if transaction['Type'] and transaction_pre: if transaction_pre!=transaction: order_Count += 1 transaction_pre = transaction if transaction['Type'] == 'Trade': Trade_Count += 1 if not transaction_pre: transaction_pre=transaction print(order_Count) #print('order-To-Trade Ratio = ', (order_Count-Trade_Count)/Trade_Count)
In [18]:
import csv HTSC_TAS_File = open('./Lecture_11_Student/data/HTSC_depth.csv', mode='rt') HTSC_TAS_Data = csv.DictReader(HTSC_TAS_File) Market_Is_Open = False order_Count = 0 Trade_Count = 0 for transaction in HTSC_TAS_Data: if transaction['Type'] == 'Mkt. Condition': if 'TRD' in transaction['Qualifiers']: Market_Is_Open = True else: Market_Is_Open = False if Market_Is_Open: if transaction['Type']: order_Count += 1 if transaction['Type'] == 'Trade': Trade_Count += 1 print(order_Count)
2602
In [43]:
import pandas as pd import matplotlib.pyplot as plt from scipy import stats from statsmodels.stats.diagnostic import acorr_ljungbox ''' #1 ''' path = "/Users/lvcheng/Desktop/HSI.xlsx" df = pd.read_excel(path) df['Date'] = pd.to_datetime(df['Date'], format=None) df = df.set_index('Date') df_Daily = df.interpolate(method='linear', axis=0) ''' #2 ''' df_Weekly = df_Daily[df_Daily.index.weekday == 4] df_Monthly = df_Daily.resample('BM').last() ''' #3. Use Pandas to calculate the corresponding daily, weekly and monthly returns of the above 3 dataframes ''' df_Daily_return = df_Daily.pct_change() #df_Daily_return = df_Daily_return.dropna() Daily_return_array = df_Daily_return.values df_Weekly_return = df_Weekly.pct_change() #df_Weekly_return = df_Weekly_return.dropna() Weekly_return_array = df_Weekly_return.values df_Monthly_return = df_Monthly.pct_change() #df_Monthly_return = df_Monthly_return.dropna() Monthly_return_array = df_Monthly_return.values ''' #4For the weekly dataframe, calculate the covariance matrix (68 x 68). OUTPUT the file as covHSI.csv ''' df_Weekly_return_cov = df_Weekly_return.cov() df_Weekly_return_cov.to_csv('covHSI.csv')
--------------------------------------------------------------------------- FileNotFoundError Traceback (most recent call last) <ipython-input-43-c110b6299a4e> in <module> 8 ''' 9 path = "/Users/lvcheng/Desktop/HSI.xlsx" ---> 10 df = pd.read_excel(path) 11 df['Date'] = pd.to_datetime(df['Date'], format=None) 12 df = df.set_index('Date') /usr/local/lib/python3.6/dist-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs) 206 else: 207 kwargs[new_arg_name] = new_arg_value --> 208 return func(*args, **kwargs) 209 210 return wrapper /usr/local/lib/python3.6/dist-packages/pandas/io/excel/_base.py in read_excel(io, sheet_name, header, names, index_col, usecols, squeeze, dtype, engine, converters, true_values, false_values, skiprows, nrows, na_values, keep_default_na, verbose, parse_dates, date_parser, thousands, comment, skip_footer, skipfooter, convert_float, mangle_dupe_cols, **kwds) 308 309 if not isinstance(io, ExcelFile): --> 310 io = ExcelFile(io, engine=engine) 311 elif engine and engine != io.engine: 312 raise ValueError( /usr/local/lib/python3.6/dist-packages/pandas/io/excel/_base.py in __init__(self, io, engine) 817 self._io = _stringify_path(io) 818 --> 819 self._reader = self._engines[engine](self._io) 820 821 def __fspath__(self): /usr/local/lib/python3.6/dist-packages/pandas/io/excel/_xlrd.py in __init__(self, filepath_or_buffer) 19 err_msg = "Install xlrd >= 1.0.0 for Excel support" 20 import_optional_dependency("xlrd", extra=err_msg) ---> 21 super().__init__(filepath_or_buffer) 22 23 @property /usr/local/lib/python3.6/dist-packages/pandas/io/excel/_base.py in __init__(self, filepath_or_buffer) 357 self.book = self.load_workbook(filepath_or_buffer) 358 elif isinstance(filepath_or_buffer, str): --> 359 self.book = self.load_workbook(filepath_or_buffer) 360 else: 361 raise ValueError( /usr/local/lib/python3.6/dist-packages/pandas/io/excel/_xlrd.py in load_workbook(self, filepath_or_buffer) 34 return open_workbook(file_contents=data) 35 else: ---> 36 return open_workbook(filepath_or_buffer) 37 38 @property /usr/local/lib/python3.6/dist-packages/xlrd/__init__.py in open_workbook(filename, logfile, verbosity, use_mmap, file_contents, encoding_override, formatting_info, on_demand, ragged_rows) 109 else: 110 filename = os.path.expanduser(filename) --> 111 with open(filename, "rb") as f: 112 peek = f.read(peeksz) 113 if peek == b"PK\x03\x04": # a ZIP file FileNotFoundError: [Errno 2] No such file or directory: '/Users/lvcheng/Desktop/HSI.xlsx'
In [13]:
#Practice 1: Sample Code #Import packages import gzip import csv import pandas as pd from collections import defaultdict import datetime as dt #Read in raw data and create variales VOD_TAS_file = pd.read_csv('./Lecture_11_Student/data/HTSC_TAS.csv') VOD_TAS_Data = csv.DictReader(VOD_TAS_file) VOD_Depth_file = pd.read_csv('./Lecture_11_Student/data/HTSC_depth.csv') VOD_Depth_Data = csv.DictReader(VOD_Depth_file) #Create variables as dictionaries Trade_Count_For_Timestamp = defaultdict(lambda:defaultdict(lambda:0)) Trade_Price_For_Timestamp = defaultdict(lambda:defaultdict(lambda:defaultdict(lambda:0))) Trade_Volume_For_Timestamp = defaultdict(lambda:defaultdict(lambda:defaultdict(lambda:0))) Market_Open = defaultdict(lambda:False) Prev_L1_Bid_Price = defaultdict(lambda:None) Prev_L1_Ask_Price = defaultdict(lambda:None) Prev_L1_Bid_Depth = defaultdict(lambda:None) Prev_L1_Ask_Depth = defaultdict(lambda:None) #Iterate TAS data to get all the trades and store price and volume in dictionaries indexed by sec, timestamp and count for transaction in VOD_TAS_Data: security = transaction['#RIC'] if transaction['Type'] == 'Mkt. Condition' and 'TRD' in transaction['Qualifiers']: Market_Open[security] = True if transaction['Type'] == 'Trade' and transaction['Volume'] and transaction['Price'] and Market_Open[security]: timestamp = dt.datetime.strptime(transaction['Date-Time'][:-7], '%Y-%m-%dT%H:%M:%S.%f') #Note [-7] rounds the timestmp to milliseconds price = float(transaction['Price']) volume = int(transaction['Volume']) Trade_Price_For_Timestamp[security][timestamp][Trade_Count_For_Timestamp[security][timestamp]] = price Trade_Volume_For_Timestamp[security][timestamp][Trade_Count_For_Timestamp[security][timestamp]] = volume Trade_Count_For_Timestamp[security][timestamp] += 1 #Iterate Depth data to check if a depth update is caused by trade row = 0 for depth_update in VOD_Depth_Data: if row <= 1000: #Let's only test our code with the first 1000 rows from Depth data security = depth_update['#RIC'] timestamp = dt.datetime.strptime(depth_update['Date-Time'][:-7], '%Y-%m-%dT%H:%M:%S.%f') #Only do the comparison between Trade Data and Depth Update when both previous best bid and ask are defined if Prev_L1_Bid_Price[security] and Prev_L1_Ask_Price[security]: if depth_update['L1-BidPrice']: #when best bid is defined in the current depth update current_L1_Bid_Price = float(depth_update['L1-BidPrice']) current_L1_Bid_Depth = int(depth_update['L1-BidSize']) if depth_update['L1-AskPrice']: #when best ask is defined in the current depth update current_L1_Ask_Price = float(depth_update['L1-AskPrice']) current_L1_Ask_Depth = int(depth_update['L1-AskSize']) if Trade_Count_For_Timestamp[security][timestamp]: #if there is trade at the current timestamp for trade_count in range(Trade_Count_For_Timestamp[security][timestamp]): #iterate every trade at the current timetamp #Compare the trade price/volume against the current best bid price and depth change in the depth data if (current_L1_Bid_Price == Trade_Price_For_Timestamp[security][timestamp][trade_count] and \ (Prev_L1_Bid_Depth[security] - current_L1_Bid_Depth) == Trade_Volume_For_Timestamp[security][timestamp][trade_count]): print('Security=',security, 'Timestamp=',timestamp, 'TransactionType=','Trade', 'Trade_Index=',trade_count, 'Trade_Vol=', Trade_Volume_For_Timestamp[security][timestamp][trade_count], 'Side=','Bi', 'BP=',current_L1_Bid_Price, 'BPD=', current_L1_Bid_Depth, 'Pre_BPD=', Prev_L1_Bid_Depth[security]) elif (current_L1_Bid_Price != Trade_Price_For_Timestamp[security][timestamp][trade_count]) and \ (Prev_L1_Bid_Price[security] == Trade_Price_For_Timestamp[security][timestamp][trade_count]) and \ (Prev_L1_Bid_Depth[security] == Trade_Volume_For_Timestamp[security][timestamp][trade_count]): print('Security=',security, 'Timestamp=',timestamp, 'TransactionType=','Trade', 'Trade_Index=',trade_count, 'Trade_Vol=', Trade_Volume_For_Timestamp[security][timestamp][trade_count], 'Side=','Bi', 'BP=',current_L1_Bid_Price, 'BPD=', current_L1_Bid_Depth, 'Pre_BPD=', Prev_L1_Bid_Depth[security]) # Update the Previous Depth statistics if depth_update['L1-BidPrice']: Prev_L1_Bid_Price[security] = float(depth_update['L1-BidPrice']) Prev_L1_Bid_Depth[security] = int(depth_update['L1-BidSize']) if depth_update['L1-AskPrice']: Prev_L1_Ask_Price[security] = float(depth_update['L1-AskPrice']) Prev_L1_Ask_Depth[security] = int(depth_update['L1-AskSize']) row += 1
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) <ipython-input-13-8a7a200a8ab5> in <module> 27 for transaction in VOD_TAS_Data: 28 security = transaction['#RIC'] ---> 29 if transaction['Type'] == 'Mkt. Condition' and 'TRD' in transaction['Qualifiers']: 30 Market_Open[security] = True 31 if transaction['Type'] == 'Trade' and transaction['Volume'] and transaction['Price'] and Market_Open[security]: KeyError: 'Type'
In [9]:
#Import packages import gzip import csv import pandas as pd from collections import defaultdict import datetime as dt #Read in raw data and create variales #VOD_TAS_file = gzip.open('./Lecture_11_Student/data/HTSC_TAS.csv', mode='rt') VOD_TAS_Data = pd.read_csv('./Lecture_11_Student/data/HTSC_TAS.csv') #VOD_Depth_file = gzip.open('./Lecture_11_Student/data/HTSC_depth.csv', mode='rt') VOD_Depth_Data = pd.read_csv('./Lecture_11_Student/data/HTSC_depth.csv') VOD_TAS_Data
#RIC Domain Date-Time GMT Offset Type Ex/Cntrb.ID LOC Price Volume Market VWAP ... Original Price Original Volume Original Seq. No. Original Exch Time Trade Price Currency Unique Trade Identification Net Change Original Unique Trade Identification ISIN Unique Quote Identification
0 HTSCq.L Market Price 2019-11-06T17:30:00.281636801Z 0 Quote NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1 HTSCq.L Market Price 2019-11-06T17:30:00.281636801Z 0 Mkt. Condition NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2 HTSCq.L Market Price 2019-11-07T03:55:00.123212344Z 0 Trade NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3 HTSCq.L Market Price 2019-11-07T08:50:00.054196194Z 0 Mkt. Condition NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4 HTSCq.L Market Price 2019-11-07T08:50:00.055174436Z 0 Quote NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2860 HTSCq.L Market Price 2019-11-07T17:00:00.068136347Z 0 Quote NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2861 HTSCq.L Market Price 2019-11-07T17:00:01.240362477Z 0 Quote NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2862 HTSCq.L Market Price 2019-11-07T17:10:43.258904702Z 0 Trade SINT NaN 25.1973 100000.0 NaN ... NaN NaN NaN NaN USD 1.015517e+18 NaN NaN NaN NaN
2863 HTSCq.L Market Price 2019-11-07T17:10:43.331008606Z 0 Trade SINT NaN 25.0018 100000.0 NaN ... NaN NaN NaN NaN USD 1.020021e+18 NaN NaN NaN NaN
2864 HTSCq.L Market Price 2019-11-07T17:15:00.081804530Z 0 Mkt. Condition NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

2865 rows × 77 columns

In [ ]:
('./Lecture_11_Student/data/HTSC_depth.csv', mode='rt')