get_fund_info.zip

  • Sy_DLV
    了解作者
  • Python
    开发工具
  • 2KB
    文件大小
  • zip
    文件格式
  • 0
    收藏次数
  • 1 积分
    下载积分
  • 0
    下载次数
  • 2020-01-02 14:37
    上传日期
基于wind api获取mutual fund相关数据
get_fund_info.zip
  • get_fund_info.py
    7.1KB
内容介绍
import pandas as pd import os from datetime import datetime from util.func import date_formatting, month_end import warnings warnings.filterwarnings('ignore') from WindPy import w w.start() # Define project path and column name path = '{}'.format(os.sep).join(os.path.realpath('.').split(os.sep)[:-1]) + os.sep # -1:上层目录路径 不填数表示本层目录 col_fund, col_bond, col_index, col_manager = 'fund_id', 'bond_id', 'index_id', 'manager_id' col_date, col_effect, col_begin, col_end = 'date', 'effect_date', 'begin_date', 'end_date' print('path of project:', path) # path of data path_list = ['fund_hold', 'fund_nav', 'fund_info', 'util'] # path under project path_hold, path_nav, path_info, path_util = [''.join([path, x, os.sep]) for x in path_list] # 本文件特用路径名 path_raw = path_info + 'raw' + os.sep path_backup = path_info + 'backup' + os.sep def get_fund_type(ts_type): ts_type = ts_type.reset_index(drop = True) ts_begin = ts_type.copy().sort_values(by = col_date).rename(columns = {col_date: col_begin}) ts_end = ts_type.copy().shift(-1).rename(columns = {col_date: col_end})[[col_end]] # 下一截面的状态 ts_data = pd.merge(ts_begin, ts_end, left_index = True, right_index = True, how = 'outer') ts_data = ts_data[ts_data['tradestatus']=='纳入'] return ts_data # 板块代码 sector_dict = {'中长期纯债型基金':'2001010301000000', '混合债券型一级基金':'2001010303000000', '混合债券型二级基金': '2001010304000000'} index_dict = {'混合债券型一级基金':'885006.WI', '混合债券型二级基金':'885007.WI'} # '长期纯债型基金':'885008.WI' begin_date = '2007-01-01' end_date = datetime.today().strftime('%Y-%m-%d') # end_date = '2013-01-01' # trade_date trade_date = pd.read_csv(path + 'trade_date.csv', index_col = None, encoding = 'gbk') trade_date = date_formatting(trade_date, 'trade_date', 'month_end') trade_date = trade_date[(trade_date['trade_date']>=begin_date)&(trade_date['trade_date']<=end_date)] date_list = trade_date['trade_date'].to_list() date_list.sort() # # 板块初始成分获取 # for key, value in sector_dict.items(): # print(key) # data = w.wset("sectorconstituent","date=2007-01-01;sectorid=%s"%value) # if data.ErrorCode: # raise Exception('data error when get new data: %d' % data.ErrorCode) # data_sec = pd.DataFrame(data.Data, index = data.Fields).T.rename(columns = {'date':col_date}) # data_sec = date_formatting(data_sec, col_date) # # print(data_sec.head()) # data_sec.to_csv(path_info + 'sec_info_(%s).csv'%key, index = False, encoding = 'gbk') # # 指数进出记录, 仅混合债券型一级基金和混合债券型二级基金 # for key, value in index_dict.items(): # print(key) # data_new = w.wset("indexhistory","startdate=%s;enddate=%s;windcode=%s;\ # field=tradedate,tradecode,tradename,tradestatus"%(begin_date, end_date, value)) # if data_new.ErrorCode: # raise Exception('data error when get new data: %d' % data_new.ErrorCode) # # data_new = pd.DataFrame(data_new.Data, index = data_new.Fields).T\ # .rename(columns = {'tradedate':col_date, 'tradecode':col_fund}) # data_new = date_formatting(data_new, col_date) # print(data_new.head()) # data_new.to_csv(path_info + 'sector_in&out(%s).csv'%key, index = False, encoding = 'gbk') # 将进出记录转为begin_date, end_date 格式 # for key, value in index_dict.items(): # fund_data = pd.read_csv(path_info + 'sector_in&out(%s).csv'%key, index_col = None, encoding = 'gbk') # fund_type = [] # for fund in set(fund_data[col_fund]): # temp = fund_data[fund_data[col_fund] == fund] # fund_type.append(get_fund_type(temp)) # fund_type = pd.concat(fund_type) # fund_type = fund_type.drop('tradestatus', axis = 1) # # fund_type.to_csv(path_info + 'sec_info_in&out(%s).csv'%key, index = False, encoding = 'gbk') # 在板块数据的begin_date & end_date之间插入中间所有的交易日期日期 # insert a column-'date', i.e. rpt_date between begin_date & end_date # print(datetime.now(), '') for key in index_dict.keys(): print(key) print(type(end_date)) fund_type = pd.read_csv(path_info + 'sec_info_in&out(%s).csv'%key, index_col = None, encoding = 'gbk') fund_type = date_formatting(fund_type, col_begin, col_end) fund_info = pd.DataFrame() n = len(fund_type) for i in range(n): data = fund_type.iloc[i, ] data['index_col']=1 print(data[col_begin]) print(data[col_end]) if data[col_end] is None: date_list = [x for x in date_list if (x>=data[col_begin])&(x<=end_date)] elif data[col_begin] is None: date_list = [x for x in date_list if (x<=data[col_end])&(x>='2007-01-01')] else: date_list = [x for x in date_list if (x>=data[col_begin])&(x<=data[col_end])] a = pd.DataFrame(date_list, columns = [col_date]) if a.empty: # print(data) print(type(data[col_begin])) print(type(data[col_end])) continue a['index_col']=1 data = pd.merge(pd.DataFrame(data).T, a, on ='index_col', how = 'outer') fund_info = fund_info.append(data) print(fund_info.shape) print(fund_info.head()) # save new data fund_info = fund_info.drop('index_col', axis =1) fund_info = fund_info.sort_values(by = [col_date, col_fund]) fund_info.to_csv(path_info + 'sector_info(%s).csv'%key, index = False, encoding = 'gbk') # # 更新中长期纯债型基金成分 # data_old = pd.read_csv(path_info + 'sec_info_(中长期纯债型基金).csv', index_col = None, encoding='gbk') # data_old = date_formatting(data_old, col_date) # begin_date_old = data_old[col_date].max() # date_list = [x for x in date_list if x >= begin_date_old] # date_list.sort() # # # 中长期纯债型基金成分获取(因为没有对应的指数, 只能按交易日一次获取) # data_add = pd.DataFrame() # for date in date_list: # print(date) # data_new = w.wset("sectorconstituent","date=%s;sectorid=2001010301000000;field=wind_code,sec_name"%date) # if data_new.ErrorCode: # data_add.to_csv(path_info + 'sec_info_(中长期纯债型基金).csv', index = False, encoding = 'gbk') # raise Exception('data error when get new data: %d' % data_new.ErrorCode) # data_new = pd.DataFrame(data_new.Data, index = data_new.Fields).T.rename(columns = {'wind_code':col_fund}) # data_new[col_date]=date # data_add = data_add.append(data_new) # # # print(data_add) # data_add = date_formatting(data_add, col_date) # data_add.to_csv(path_backup + 'sec_info(中长期纯债型基金)_%s-%s.csv'%(begin_date_old, end_date), index = False, encoding='gbk') # # data_all = data_old[data_old[col_date] < begin_date_old].append(data_add) # data_all.to_csv(path_info + 'sector_info(中长期纯债型基金).csv', index = False, encoding = 'gbk')
评论
    相关推荐