当前位置: 首页 > news >正文

Spark 之 like 表达式

LikeSimplification 会做优化

/*** Simplifies LIKE expressions that do not need full regular expressions to evaluate the condition.* For example, when the expression is just checking to see if a string starts with a given* pattern.*/
object LikeSimplification extends Rule[LogicalPlan] with PredicateHelper {// if guards below protect from escapes on trailing %.// Cases like "something\%" are not optimized, but this does not affect correctness.private val startsWith = "([^_%]+)%".rprivate val endsWith = "%([^_%]+)".rprivate val startsAndEndsWith = "([^_%]+)%([^_%]+)".rprivate val contains = "%([^_%]+)%".rprivate val equalTo = "([^_%]*)".rprivate def simplifyLike(input: Expression, pattern: String, escapeChar: Char = '\\'): Option[Expression] = {if (pattern.contains(escapeChar)) {// There are three different situations when pattern containing escapeChar:// 1. pattern contains invalid escape sequence, e.g. 'm\aca'// 2. pattern contains escaped wildcard character, e.g. 'ma\%ca'// 3. pattern contains escaped escape character, e.g. 'ma\\ca'// Although there are patterns can be optimized if we handle the escape first, we just// skip this rule if pattern contains any escapeChar for simplicity.None} else {pattern match {case startsWith(prefix) =>Some(StartsWith(input, Literal(prefix)))case endsWith(postfix) =>Some(EndsWith(input, Literal(postfix)))// 'a%a' pattern is basically same with 'a%' && '%a'.// However, the additional `Length` condition is required to prevent 'a' match 'a%a'.case startsAndEndsWith(prefix, postfix) =>Some(And(GreaterThanOrEqual(Length(input), Literal(prefix.length + postfix.length)),And(StartsWith(input, Literal(prefix)), EndsWith(input, Literal(postfix)))))case contains(infix) =>Some(Contains(input, Literal(infix)))case equalTo(str) =>Some(EqualTo(input, Literal(str)))case _ => None}}}private def simplifyMultiLike(child: Expression, patterns: Seq[UTF8String], multi: MultiLikeBase): Expression = {val (remainPatternMap, replacementMap) =patterns.map { p =>p -> Option(p).flatMap(p => simplifyLike(child, p.toString))}.partition(_._2.isEmpty)val remainPatterns = remainPatternMap.map(_._1)val replacements = replacementMap.map(_._2.get)if (replacements.isEmpty) {multi} else {multi match {case l: LikeAll =>val and = buildBalancedPredicate(replacements, And)if (remainPatterns.nonEmpty) And(and, l.copy(patterns = remainPatterns)) else andcase l: NotLikeAll =>val and = buildBalancedPredicate(replacements.map(Not(_)), And)if (remainPatterns.nonEmpty) And(and, l.copy(patterns = remainPatterns)) else andcase l: LikeAny =>val or = buildBalancedPredicate(replacements, Or)if (remainPatterns.nonEmpty) Or(or, l.copy(patterns = remainPatterns)) else orcase l: NotLikeAny =>val or = buildBalancedPredicate(replacements.map(Not(_)), Or)if (remainPatterns.nonEmpty) Or(or, l.copy(patterns = remainPatterns)) else or}}}def apply(plan: LogicalPlan): LogicalPlan = plan.transformAllExpressionsWithPruning(_.containsPattern(LIKE_FAMLIY), ruleId) {case l @ Like(input, Literal(pattern, StringType), escapeChar) =>if (pattern == null) {// If pattern is null, return null value directly, since "col like null" == null.Literal(null, BooleanType)} else {simplifyLike(input, pattern.toString, escapeChar).getOrElse(l)}case l @ LikeAll(child, patterns) if CollapseProject.isCheap(child) =>simplifyMultiLike(child, patterns, l)case l @ NotLikeAll(child, patterns) if CollapseProject.isCheap(child) =>simplifyMultiLike(child, patterns, l)case l @ LikeAny(child, patterns) if CollapseProject.isCheap(child) =>simplifyMultiLike(child, patterns, l)case l @ NotLikeAny(child, patterns) if CollapseProject.isCheap(child) =>simplifyMultiLike(child, patterns, l)}
}
测试
  test("test data, force apply AQE") {withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",SQLConf.ADAPTIVE_EXECUTION_FORCE_APPLY.key -> "true") {val df = sql("SELECT * FROM testData where value not like '%HotFocus%'")df.showdf.printSchema()}}

在这里插入图片描述

  test("test data like, force apply AQE") {withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",SQLConf.ADAPTIVE_EXECUTION_FORCE_APPLY.key -> "true") {val df = sql("SELECT * FROM testData where value not like '%%HotFocus%%'")df.showdf.printSchema()}}

在这里插入图片描述

http://www.lryc.cn/news/587496.html

相关文章:

  • SpringMVC4
  • UI前端与数字孪生结合实践探索:智慧物流的仓储自动化管理系统
  • pycharm恢复出厂设置,可以解决大多数pycharm存在的问题
  • 创建自定义Dataset类与多分类问题实战
  • 怎么解决数据库幻读问题
  • 【图片识别改名】水印相机拍的照片如何将照片的名字批量改为水印内容?图片识别改名的详细步骤和注意事项
  • 设计模式笔记_结构型_桥接模式
  • vscode 安装 esp ide环境
  • 基于MATLAB的LSTM长短期记忆神经网络的数据回归预测方法应用
  • 02 51单片机之LED闪烁
  • 前端同学,你能不能别再往后端传一个巨大的JSON了?
  • 构建完整工具链:GCC/G++ + Makefile + Git 自动化开发流程
  • 前端接入海康威视摄像头的三种方案
  • autoware激光雷达和相机标定
  • JAVA 设计模式 工厂
  • Docker搭建Redis分片集群
  • 鸿蒙应用开发: 鸿蒙项目中使用私有 npm 插件的完整流程
  • Kotlin集合接口
  • 常用的OTP语音芯片有哪些?
  • 前端性能与可靠性工程系列: 渲染、缓存与关键路径优化
  • Spring Boot - Spring Boot 集成 MyBatis 分页实现 PageHelper
  • 【React Native】环境变量和封装 fetch
  • 智源:LLM指令数据建设框架
  • VR样板间:房产营销新变革
  • Cesium 9 ,Cesium 离线地图本地实现与服务器部署( Vue + Cesium 多项目共享离线地图切片部署实践 )
  • 谷歌开源库gtest 框架安装与使用
  • VR全景制作流程?什么是全景?
  • ELK、Loki、Kafka 三种日志告警联动方案全解析(附实战 Demo)
  • EVOLVEpro安装使用教程-蛋白质语言模型驱动的快速定向进化
  • 快速搭建Maven仓库服务