java代码整合kettle9.3实现读取表中的数据,生成excel文件
java代码整合kettle9.3实现读取表中的数据,生成excel文件
1.简介
本次使用java代码整合kettle9.3版本,数据库使用mysql。
2.jar包导入
项目需要依赖部分kettle中的jar包,请将这部分jar包自行导入maven仓库。
<dependency><groupId>com.kettle</groupId><artifactId>kettle-engine</artifactId><version>9.3</version></dependency><dependency><groupId>com.kettle</groupId><artifactId>metastore</artifactId><version>9.3</version></dependency><dependency><groupId>com.kettle</groupId><artifactId>kettle-core</artifactId><version>9.3</version></dependency><dependency><groupId>com.kettle</groupId><artifactId>pentaho-encryption-support</artifactId><version>9.3</version></dependency><dependency><groupId>com.kettle</groupId><artifactId>monetdb-jdbc</artifactId><version>2.8</version></dependency><dependency><groupId>org.apache.commons</groupId><artifactId>commons-vfs2</artifactId><version>2.6.0</version></dependency><dependency><groupId>com.kettle</groupId><artifactId>jxl</artifactId><version>2.6</version></dependency><dependency><groupId>com.kettle</groupId><artifactId>commons-dbcp</artifactId><version>1.4</version></dependency><dependency><groupId>com.kettle</groupId><artifactId>commons-pool</artifactId><version>1.5</version></dependency><dependency><groupId>mysql</groupId><artifactId>mysql-connector-java</artifactId><version>5.1.47</version><scope>runtime</scope></dependency>
可以通过maven命令的形式导入:
mvn install:install-file -Dfile=jar包地址 -DgroupId=jar包分组id -DartifactId=jar包名称id -Dversion=jar包版本 -Dpackaging=jar
如:
mvn install:install-file -Dfile=D:\worktool\kettle\pdi-ce-7.1.0.0-12\data-integration\lib\kettle-core-9.3.0.0-12.jar -DgroupId=com.kettle -DartifactId=kettle-core -Dversion=9.3 -Dpackaging=jar
3.项目所需xml配置
项目整合kettle启动时会爆需要kettle-password-encoder-plugins.xml文件。
kettle-password-encoder-plugins.xml
<password-encoder-plugins><password-encoder-plugin id="Kettle"><description>Kettle Password Encoder</description><classname>org.pentaho.di.core.encryption.KettleTwoWayPasswordEncoder</classname></password-encoder-plugin>
</password-encoder-plugins>
mysql的xml配置文件
用的时候需要改成自己的数据库配置,name要和代码中的kettle数据库保持一致。
<?xml version="1.0" encoding="UTF-8"?>
<connection><!--数据库名称--><name>kettle</name><!--连接地址--><server>127.0.0.1</server><!--数据库类型--><type>Mysql</type><access>Native</access><!--连接的库的名称--><database>aweb</database><!--端口--><port>3306</port><!--账号--><username>root</username><!--密码--><password>root</password><attributes><attribute><code>USE_POOLING</code><attribute>Y</attribute></attribute><attribute><code>EXTRA_OPTION_MYSQL.characterEncoding</code><attribute>utf8</attribute></attribute><attribute><code>EXTRA_OPTION_MYSQL.defaultFetchSize</code><attribute>500</attribute></attribute><attribute><code>EXTRA_OPTION_MYSQL.serverTimeZone</code><attribute>UTC</attribute></attribute><attribute><code>serverTimeZone</code><attribute>UTC</attribute></attribute><attribute><code>serverTimezone</code><attribute>UTC</attribute></attribute></attributes>
</connection>
4.java代码
package com.kettle.utils;import org.pentaho.di.core.KettleEnvironment;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.plugins.PluginRegistry;import javax.servlet.http.HttpServletRequest;/*** kettle配置*/
public class KettleConfig {/*** 运行环境初始化** @param request HttpServletRequest* @throws KettleException 异常*/public static void initKettleEnvironment(HttpServletRequest request) throws KettleException {if (KettleEnvironment.isInitialized()) {return;}if (request == null) {KettleEnvironment.init();}}/*** 插件注册,用于注册转换中需要用到的插件** @return PluginRegistry*/public static PluginRegistry getRegistry() {return PluginRegistry.getInstance();}}
package com.kettle.utils;import org.pentaho.di.core.database.Database;
import org.pentaho.di.core.database.DatabaseMeta;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.exception.KettleXMLException;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransHopMeta;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.StepMeta;/*** kettle转换操作*/
public class KettleTransformation {/*** 构建TransMeta转换** @param metaName 名称* @param transXML 数据库配置xml* @return TransMeta* @throws KettleXMLException 异常*/public static TransMeta buildTransMeta(String metaName, String... transXML) throws KettleXMLException {TransMeta transMeta = new TransMeta();// 设置转化元的名称transMeta.setName(metaName);// 添加转换的数据库连接for (int i = 0; i < transXML.length; i++) {transMeta.addDatabase(new DatabaseMeta(transXML[i]));}return transMeta;}/*** 用于将表输入步骤与第二步骤绑定** @param transMeta TransMeta* @param from 第一个步骤* @param to 第二个步骤*/public static void addTransHop(TransMeta transMeta, StepMeta from, StepMeta to) {transMeta.addTransHop(new TransHopMeta(from, to));}/*** 执行抽取** @param transMeta TransMeta* @param targetDbName 数据库名*/public static void executeTrans(TransMeta transMeta, String targetDbName) {try {Database database = new Database(null, transMeta.findDatabase(targetDbName));database.connect();Trans trans = new Trans(transMeta);trans.execute(new String[]{"com.kettle.start..."});trans.waitUntilFinished();// 关闭数据库连接database.disconnect();if (trans.getErrors() > 0) {System.out.println(trans.getErrors());}} catch (KettleException e) {e.printStackTrace();}}}
package com.kettle.utils;import org.pentaho.di.core.database.DatabaseMeta;
import org.pentaho.di.core.plugins.PluginRegistry;
import org.pentaho.di.core.plugins.StepPluginType;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.StepMeta;
import org.pentaho.di.trans.steps.exceloutput.ExcelField;
import org.pentaho.di.trans.steps.exceloutput.ExcelOutputMeta;
import org.pentaho.di.trans.steps.tableinput.TableInputMeta;/*** kettle步骤*/
public class KettleStep {/*** 设置表输入步骤** @param transMeta TransMeta* @param registry PluginRegistry* @param sourceDbName 数据库名* @param sql sql* @param stepName 步骤名* @return StepMeta*/public static StepMeta setTableInputStep(TransMeta transMeta, PluginRegistry registry, String sourceDbName, String sql,String stepName) {// 创建表输入TableInputMeta tableInputMeta = new TableInputMeta();String pluginId = registry.getPluginId(StepPluginType.class, tableInputMeta);// 指定数据源数据库配置名DatabaseMeta source = transMeta.findDatabase(sourceDbName);tableInputMeta.setDatabaseMeta(source);tableInputMeta.setSQL(sql);// 将表输入添加到转换中StepMeta stepMeta = new StepMeta(pluginId, stepName, tableInputMeta);// 将表输入添加到步骤中transMeta.addStep(stepMeta);return stepMeta;}/*** 设置Excel输出步骤** @param transMeta TransMeta* @param registry PluginRegistry* @param stepName 步骤名* @param fileName 文件名* @return StepMeta*/public static StepMeta setExcelOutput(TransMeta transMeta, PluginRegistry registry, String stepName, String fileName) {// 创建表输出ExcelOutputMeta excelOutputMeta = new ExcelOutputMeta();String pluginId = registry.getPluginId(StepPluginType.class, excelOutputMeta);excelOutputMeta.setHeaderEnabled(true);excelOutputMeta.setFooterEnabled(false);excelOutputMeta.setAppend(false);excelOutputMeta.setFileName(fileName);excelOutputMeta.setDoNotOpenNewFileInit(false);excelOutputMeta.setCreateParentFolder(false);excelOutputMeta.setOutputFields(new ExcelField[]{});// 将表输出添加到转换中StepMeta stepMeta = new StepMeta(pluginId, stepName, excelOutputMeta);// 添加到步骤中transMeta.addStep(stepMeta);return stepMeta;}}
package com.kettle.utils;import com.kettle.common.enums.Kettle;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.plugins.PluginRegistry;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.StepMeta;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import org.w3c.dom.Document;
import org.xml.sax.SAXException;import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;@Component
public class KettleClient {private String databaseName = "kettle";/*** 读取文件内容** @param path 文件地址* @return 文件中的内容*/public String getDatabase(String path) {try {Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new File(path));Transformer t = TransformerFactory.newInstance().newTransformer();ByteArrayOutputStream bos = new ByteArrayOutputStream();t.transform(new DOMSource(document), new StreamResult(bos));return bos.toString();} catch (SAXException | IOException | TransformerException | ParserConfigurationException e) {e.printStackTrace();}return "";}public void getExcel(String sql) {try {KettleConfig.initKettleEnvironment(null);String fileName = "D:/workProject/kettle/src/main/resources/excel/text.xls";String transXML = getDatabase("src/main/resources/jdbc/mysql.xml");TransMeta meta = KettleTransformation.buildTransMeta(databaseName, transXML);PluginRegistry registry = KettleConfig.getRegistry();StepMeta step1 = KettleStep.setTableInputStep(meta, registry, databaseName, sql, "table input");StepMeta step2 = KettleStep.setExcelOutput(meta, registry, "excel out", fileName);KettleTransformation.addTransHop(meta, step1, step2);KettleTransformation.executeTrans(meta, databaseName);} catch (KettleException e) {e.printStackTrace();}}}
注意kettle数据库的名称要和xml中配置的一样。
整理借鉴了很多大佬写的,在此无法一一说明,这只是个人用来查漏补缺的文章,如果对你有帮助我很高兴。