/*** Simplifies LIKE expressions that do not need full regular expressions to evaluate the condition.* For example, when the expression is just checking to see if a string starts with a given* pattern.*/
object LikeSimplification extends Rule[LogicalPlan] with PredicateHelper {// if guards below protect from escapes on trailing %.// Cases like "something\%" are not optimized, but this does not affect correctness.private val startsWith ="([^_%]+)%".rprivate val endsWith ="%([^_%]+)".rprivate val startsAndEndsWith ="([^_%]+)%([^_%]+)".rprivate val contains ="%([^_%]+)%".rprivate val equalTo ="([^_%]*)".rprivate def simplifyLike(input: Expression, pattern: String, escapeChar: Char ='\\'): Option[Expression]={if(pattern.contains(escapeChar)){// There are three different situations when pattern containing escapeChar:// 1. pattern contains invalid escape sequence, e.g. 'm\aca'// 2. pattern contains escaped wildcard character, e.g. 'ma\%ca'// 3. pattern contains escaped escape character, e.g. 'ma\\ca'// Although there are patterns can be optimized if we handle the escape first, we just// skip this rule if pattern contains any escapeChar for simplicity.None}else{pattern match {casestartsWith(prefix)=>Some(StartsWith(input,Literal(prefix)))caseendsWith(postfix)=>Some(EndsWith(input,Literal(postfix)))// 'a%a' pattern is basically same with 'a%' && '%a'.// However, the additional `Length` condition is required to prevent 'a' match 'a%a'.casestartsAndEndsWith(prefix, postfix)=>Some(And(GreaterThanOrEqual(Length(input),Literal(prefix.length + postfix.length)),And(StartsWith(input,Literal(prefix)),EndsWith(input,Literal(postfix)))))casecontains(infix)=>Some(Contains(input,Literal(infix)))caseequalTo(str)=>Some(EqualTo(input,Literal(str)))case _ => None}}}private def simplifyMultiLike(child: Expression, patterns: Seq[UTF8String], multi: MultiLikeBase): Expression ={val(remainPatternMap, replacementMap)=patterns.map { p =>p ->Option(p).flatMap(p =>simplifyLike(child, p.toString))}.partition(_._2.isEmpty)val remainPatterns = remainPatternMap.map(_._1)val replacements = replacementMap.map(_._2.get)if(replacements.isEmpty){multi}else{multi match {case l: LikeAll =>val and=buildBalancedPredicate(replacements, And)if(remainPatterns.nonEmpty)And(and, l.copy(patterns = remainPatterns))elseandcase l: NotLikeAll =>val and=buildBalancedPredicate(replacements.map(Not(_)), And)if(remainPatterns.nonEmpty)And(and, l.copy(patterns = remainPatterns))elseandcase l: LikeAny =>val or=buildBalancedPredicate(replacements, Or)if(remainPatterns.nonEmpty)Or(or, l.copy(patterns = remainPatterns))elseorcase l: NotLikeAny =>val or=buildBalancedPredicate(replacements.map(Not(_)), Or)if(remainPatterns.nonEmpty)Or(or, l.copy(patterns = remainPatterns))elseor}}}def apply(plan: LogicalPlan): LogicalPlan = plan.transformAllExpressionsWithPruning(_.containsPattern(LIKE_FAMLIY), ruleId){case l @ Like(input,Literal(pattern, StringType), escapeChar)=>if(pattern == null){// If pattern is null, return null value directly, since "col like null" == null.Literal(null, BooleanType)}else{simplifyLike(input, pattern.toString, escapeChar).getOrElse(l)}case l @ LikeAll(child, patterns)if CollapseProject.isCheap(child)=>simplifyMultiLike(child, patterns, l)case l @ NotLikeAll(child, patterns)if CollapseProject.isCheap(child)=>simplifyMultiLike(child, patterns, l)case l @ LikeAny(child, patterns)if CollapseProject.isCheap(child)=>simplifyMultiLike(child, patterns, l)case l @ NotLikeAny(child, patterns)if CollapseProject.isCheap(child)=>simplifyMultiLike(child, patterns, l)}}
测试
test("test data, force apply AQE"){withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key ->"true",SQLConf.ADAPTIVE_EXECUTION_FORCE_APPLY.key ->"true"){val df =sql("SELECT * FROM testData where value not like '%HotFocus%'")df.showdf.printSchema()}}
test("test data like, force apply AQE"){withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key ->"true",SQLConf.ADAPTIVE_EXECUTION_FORCE_APPLY.key ->"true"){val df =sql("SELECT * FROM testData where value not like '%%HotFocus%%'")df.showdf.printSchema()}}