认识一下Qlib的158因子特征
from qlib.data import D
from qlib.data.dataset import DatasetH
from qlib.data.dataset.handler import DataHandlerLP
from qlib.data.dataset.loader import QlibDataLoader
from qlib.data.dataset.processor import DropnaLabel, Fillna, ZScoreNorm# ================== 第一部分:数据准备 ==================
instruments = ['sh000300'] # 沪深300指数
base_fields = ['$open', '$high', '$low', '$close', '$volume'] # 基础OHLCV数据# ================== 第二部分:特征工程 ==================
# 注意:QLib要求特征表达式必须用双引号包裹,且不能包含as关键字
technical_fields = [# 价格波动类特征"($close - $open) / $open", # KWID"($high - $low) / $open", # KLEN"($high - $close) / $close", # KWID2# 动量类特征"($close - Ref($close, 1)) / Ref($close, 1)", # KUP"($high - Ref($close, 1)) / Ref($close, 1)", # KUP2"($low - Ref($close, 1)) / Ref($close, 1)", # KLOW"($low - Ref($low, 1)) / Ref($low, 1)", # KLOW2# 形态类特征"($close - $open) / ($high - $low + 1e-5)", # KSFT"Ref($close, 1) / ($open + 1e-5)", # KSFT2# 开盘特征"$open / (Ref($close, 1) + 1e-5)", # OPENG# 成交量特征"Mean($volume, 10) / ($volume + 1e-5)", # VSUWN10"Mean($volume, 20) / ($volume + 1e-5)", # VSUWN20"Mean($volume, 30) / ($volume + 1e-5)", # VSUWN30"Mean($volume, 60) / ($volume + 1e-5)", # VSUWN60"($volume - Ref($volume, 5)) / (Ref($volume, 5) + 1e-5)", # VSUWD5# 其他特征"($volume - Ref($volume, 10)) / (Ref($volume, 10) + 1e-5)", # VSUWD10"($volume - Ref($volume, 20)) / (Ref($volume, 20) + 1e-5)", # VSUWD20"($volume - Ref($volume, 30)) / (Ref($volume, 30) + 1e-5)", # VSUWD30"($volume - Ref($volume, 60)) / (Ref($volume, 60) + 1e-5)", # VSUWD60# 标签"Ref($close, 5) / ($close + 1e-5) - 1" # LABEL0
]# 对应的特征名称
feature_names = ["KWID", "KLEN", "KWID2","KUP", "KUP2", "KLOW", "KLOW2","KSFT", "KSFT2", "OPENG","VSUWN10", "VSUWN20", "VSUWN30", "VSUWN60", "VSUWD5","VSUWD10", "VSUWD20", "VSUWD30", "VSUWD60","LABEL0"
]# ================== 第三部分:数据加载配置 ==================
data_loader_config = {"feature": technical_fields[:-1], # 所有特征字段(排除标签)"label": [technical_fields[-1]] # 最后一个字段作为标签
}# ================== 第四部分:创建数据处理器 ==================
handler = DataHandlerLP(instruments=instruments,start_time="2010-01-01",end_time="2020-12-31",data_loader=QlibDataLoader(config=data_loader_config),infer_processors=[],learn_processors=[{"class": DropnaLabel, "kwargs": {}},{"class": Fillna, "kwargs": {}},{"class": ZScoreNorm, "kwargs": {"fit_start_time": "2010-01-01","fit_end_time": "2015-12-31"}}],
)# ================== 第五部分:数据集划分 ==================
segments = {"train": ("2010-01-01", "2015-12-31"),"valid": ("2016-01-01", "2017-12-31"),"test": ("2018-01-01", "2020-12-31"),
}# ================== 第六部分:创建数据集 ==================
dataset = DatasetH(handler=handler, segments=segments)# 准备训练数据
train_df = dataset.prepare("train")# 添加列名(因为原始表达式没有as别名)
train_df.columns = feature_namesprint("训练集数据样例:")
print(train_df.head())
训练集数据样例:KWID KLEN KWID2 KUP KUP2 \ datetime instrument 2010-01-04 sh000300 -0.015933 0.017403 0.017685 -0.011313 0.006172 2010-01-05 sh000300 0.005317 0.022529 0.003785 0.008149 0.011965 2010-01-06 sh000300 -0.004769 0.013393 0.013299 -0.006260 0.006956 2010-01-07 sh000300 -0.020236 0.029858 0.025090 -0.019841 0.004752 2010-01-08 sh000300 0.006717 0.016020 0.000560 0.002498 0.003059 KLOW KLOW2 KSFT KSFT2 OPENG \ datetime instrument 2010-01-04 sh000300 -0.011313 -0.002018 -0.915402 0.995324 1.004693 2010-01-05 sh000300 -0.010627 -0.010627 0.235979 0.997188 1.002815 2010-01-06 sh000300 -0.006417 0.012440 -0.355992 1.001498 0.998499 2010-01-07 sh000300 -0.025118 -0.024963 -0.677694 0.999594 1.000401 2010-01-08 sh000300 -0.012894 -0.007550 0.419210 1.004206 0.995806 VSUWN10 VSUWN20 VSUWN30 VSUWN60 VSUWD5 \ datetime instrument 2010-01-04 sh000300 0.902123 0.976117 1.196088 1.193749 0.408967 2010-01-05 sh000300 0.738603 0.748769 0.918195 0.924786 0.549599 2010-01-06 sh000300 0.838915 0.815925 0.974689 1.017141 0.413020 2010-01-07 sh000300 0.859500 0.804227 0.937711 0.993251 -0.091365 2010-01-08 sh000300 1.130150 1.065647 1.202639 1.312900 -0.122060 VSUWD10 VSUWD20 VSUWD30 VSUWD60 LABEL0 datetime instrument 2010-01-04 sh000300 -0.000150 -0.507190 -0.302369 0.162298 -0.031245 2010-01-05 sh000300 0.775501 -0.059368 -0.087044 0.455339 -0.024023 2010-01-06 sh000300 0.454750 -0.053867 -0.468220 0.547375 -0.011576 2010-01-07 sh000300 0.671713 0.172625 -0.298880 -0.007756 0.025174 2010-01-08 sh000300 -0.055727 0.056051 -0.524639 0.003769 0.027453
初学qlib ,查看阿尔法158和阿尔法360多因子信息,
"""from qlib.contrib.data.handler import Alpha158
import qlib
#查看Qlib官方文档#检查qlib/contrib/data/handler.py源文件中的Alpha158类定义def main():qlib.init(provider_uri='D:/qlib/qlib_data/cn_data')handler = Alpha158(instruments='csi300', start_time='2010-01-01', end_time='2020-12-31')# 使用 get_cols() 获取因子名称feature_names = handler.get_cols()print(feature_names)print(len(feature_names)) # 应该输出158if __name__ == '__main__':main()"""
from qlib.contrib.data.handler import Alpha360
import qlibdef main():# 初始化qlibqlib.init(provider_uri='D:/qlib/qlib_data/cn_data')# 定义时间范围start_time = '2010-01-01'end_time = '2020-12-31'# 创建Alpha360数据处理器handler = Alpha360(instruments='csi300',start_time=start_time,end_time=end_time,# Alpha360不需要显式设置fit_start_time和fit_end_timeinfer_processors=[],learn_processors=[],)# 获取所有因子名称feature_names = handler.get_cols()# 打印因子信息print("Alpha360因子列表:")print(feature_names)print(f"因子总数: {len(feature_names)}") # 应该输出360# 获取实际数据示例df = handler.fetch()print("\n数据示例:")print(df.head())if __name__ == '__main__':main()