当前位置: 首页 > article >正文

UserAgent包名识别工具

提示:文章写完后,目录可以自动生成,如何生成可参考右边的帮助文档

文章目录

  • 背景
  • 具体实现


背景

为了更准确地分析用户下单行为的来源渠道,并实现精细化运营与风险控制,我们希望在用户下单时,能够通过请求中的 User-Agent(UA)信息 提取其使用的客户端 App 的包名(Package Name)

具体实现

import com.ejoined.commons.plugin.utils.StringUtils;
import java.net.URLDecoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;/*** UserAgent包名识别工具*/
public class ComprehensiveUserAgentParserUtil {// 应用分类枚举private enum AppCategory {SOCIAL, ECOMMERCE, FOOD_DELIVERY, VIDEO, UTILITY}// 预定义应用库(关键词 -> 应用信息)private static final Map<String, AppInfo> PREDEFINED_APP_LIBRARY = new HashMap<>();// 浏览器包名集合(用于误匹配过滤)private static final Set<String> BROWSER_PACKAGE_SET = new HashSet<>(Arrays.asList("com.android.chrome", "com.UCMobile", "com.tencent.mtt", "com.apple.mobilesafari"));static {// 社交类应用注册(支持中英文关键词)registerApp("微信", "MicroMessenger", "com.tencent.mm", AppCategory.SOCIAL);registerApp("微信", "微信", "com.tencent.mm", AppCategory.SOCIAL);registerApp("微信", "WeChat", "com.tencent.mm", AppCategory.SOCIAL);registerApp("QQ", "QQ", "com.tencent.mobileqq", AppCategory.SOCIAL);registerApp("微博", "Weibo", "com.sina.weibo", AppCategory.SOCIAL);registerApp("快手", "Kwai", "com.kuaishou.nebula", AppCategory.SOCIAL);registerApp("快手极速版", "KwaiLite", "com.kuaishou.nebula.lite", AppCategory.SOCIAL);registerApp("小红书", "xingin", "com.xingin.xhs", AppCategory.SOCIAL);registerApp("知乎", "Zhihu", "com.zhihu.android", AppCategory.SOCIAL);// 电商类应用注册registerApp("淘宝", "AliApp(TB", "com.taobao.taobao", AppCategory.ECOMMERCE);registerApp("天猫", "AliApp(TM", "com.tmall.wireless", AppCategory.ECOMMERCE);registerApp("京东", "JD4iPhone", "com.jingdong.app.mall", AppCategory.ECOMMERCE);registerApp("拼多多", "pinduoduo", "com.xunmeng.pinduoduo", AppCategory.ECOMMERCE);registerApp("亚马逊", "Amazon", "com.amazon.mShop.android", AppCategory.ECOMMERCE);registerApp("唯品会", "vipshop", "com.achievo.vipshop", AppCategory.ECOMMERCE);// 外卖类应用注册registerApp("美团", "waimai", "com.sankuai.meituan", AppCategory.FOOD_DELIVERY);registerApp("饿了么", "Eleme", "me.ele", AppCategory.FOOD_DELIVERY);registerApp("百度外卖", "baidu.waimai", "com.baidu.waimai", AppCategory.FOOD_DELIVERY);// 视频类应用注册registerApp("抖音", "Aweme", "com.ss.android.ugc.aweme", AppCategory.VIDEO);registerApp("TikTok", "com.zhiliaoapp.musically", "com.zhiliaoapp.musically", AppCategory.VIDEO);registerApp("爱奇艺", "IQIYI", "com.qiyi.video", AppCategory.VIDEO);registerApp("B站", "BiliBili", "tv.danmaku.bili", AppCategory.VIDEO);registerApp("优酷", "Youku", "com.youku.phone", AppCategory.VIDEO);registerApp("腾讯视频", "TencentVideo", "com.tencent.qqlive", AppCategory.VIDEO);// 工具类应用注册registerApp("支付宝", "AlipayClient", "com.eg.android.AlipayGphone", AppCategory.UTILITY);
//        registerApp("Chrome浏览器", "Chrome", "com.android.chrome", AppCategory.UTILITY);registerApp("华为应用商店", "com.huawei.appmarket", "com.huawei.appmarket", AppCategory.UTILITY);}// 应用信息结构体private static class AppInfo {final String appName;final String packageName;final Pattern matchPattern;public AppInfo(String appName, String keyword, String packageName) {this.appName = appName;this.packageName = packageName;this.matchPattern = Pattern.compile(Pattern.quote(keyword) + "/?([^\\s\\(\\)]+)",Pattern.CASE_INSENSITIVE);}}// 注册应用方法private static void registerApp(String appName, String keyword, String packageName, AppCategory category) {PREDEFINED_APP_LIBRARY.put(keyword, new AppInfo(appName, keyword, packageName));}// 严格包名匹配模式(至少包含两个点)private static final Pattern STRICT_PACKAGE_PATTERN = Pattern.compile("^[a-zA-Z_][a-zA-Z0-9_]*(\\.[a-zA-Z_][a-zA-Z0-9_]*){2,}$");// 系统词汇黑名单(过滤干扰词)private static final Set<String> SYSTEM_WORD_BLACKLIST = new HashSet<>(Arrays.asList("Linux", "Android", "iPhone", "iPad", "Windows", "Mac", "iOS", "CPU", "Build","NetType", "Language", "Version", "Mobile", "Safari", "WebKit", "wv", "KHTML"));/*** 主解析方法:提取应用包名* @param userAgent 用户代理字符串* @return 解析出的应用包名,未匹配则返回空字符串*/public static String extractPackageName(String userAgent) {if (StringUtils.isBlank(userAgent)) {return "";}// 1. 后缀路径解析(处理类似 /版本号/包名 的结构)String packageName = parseSuffixPackage(userAgent);if (StringUtils.isNotBlank(packageName) && !isBrowserPackage(packageName)) {return packageName;}// 2. 预定义应用匹配packageName = matchPredefinedApps(userAgent);if (StringUtils.isNotBlank(packageName) && !isBrowserPackage(packageName)) {return packageName;}// 3. 应用商店格式解析(如 (com.huawei.appmarket; 版本号))packageName = parseAppStoreFormatPackage(userAgent);if (StringUtils.isNotBlank(packageName) && !isBrowserPackage(packageName)) {return packageName;}// 4. 启发式匹配(版本号关联、括号内容)packageName = parseHeuristicPackage(userAgent);if (StringUtils.isNotBlank(packageName) && !isBrowserPackage(packageName)) {return packageName;}// 5. 严格模式兜底(全局匹配合法包名)packageName = parseStrictModePackage(userAgent);if (StringUtils.isNotBlank(packageName) && !isBrowserPackage(packageName)) {return packageName;} else {return "";}}/*** 解析后缀路径中的包名(如 open_news_u_s/6817/cn.copper.fokapi.mysterious)*/private static String parseSuffixPackage(String userAgent) {try {String decodedUa = URLDecoder.decode(userAgent);Pattern pattern = Pattern.compile("/\\d+/([\\w\\.]+)(?:\\s|$)");Matcher matcher = pattern.matcher(decodedUa);if (matcher.find()) {String candidate = matcher.group(1);return isValidPackage(candidate) ? candidate : "";}} catch (Exception e) {// 忽略解码异常}return "";}/*** 匹配预定义应用库*/private static String matchPredefinedApps(String userAgent) {for (AppInfo appInfo : PREDEFINED_APP_LIBRARY.values()) {Matcher matcher = appInfo.matchPattern.matcher(userAgent);if (matcher.find()) {return appInfo.packageName;}}return "";}/*** 解析应用商店格式包名(如 (com.huawei.appmarket; 11.0.0))*/private static String parseAppStoreFormatPackage(String userAgent) {Pattern pattern = Pattern.compile("\\(([^;]+);", Pattern.CASE_INSENSITIVE);Matcher matcher = pattern.matcher(userAgent);if (matcher.find()) {String candidate = matcher.group(1).trim();return isValidPackage(candidate) ? candidate : "";}return "";}/*** 启发式匹配(版本号关联、括号内容解析)*/private static String parseHeuristicPackage(String userAgent) {// 模式1:包名/版本号 格式匹配Pattern versionPattern = Pattern.compile("([a-zA-Z_][a-zA-Z0-9_\\.]*)/[0-9\\.]+",Pattern.CASE_INSENSITIVE);Matcher versionMatcher = versionPattern.matcher(userAgent);if (versionMatcher.find()) {String candidate = versionMatcher.group(1);if (isValidPackage(candidate)) {return candidate;}}// 模式2:括号内内容解析Pattern bracketPattern = Pattern.compile("\\(([^)]+)\\)", Pattern.CASE_INSENSITIVE);Matcher bracketMatcher = bracketPattern.matcher(userAgent);while (bracketMatcher.find()) {String content = bracketMatcher.group(1);for (String part : content.split(";|,| ")) {if (isValidPackage(part)) {return part;}}}return "";}/*** 严格模式匹配合法包名(全局搜索)*/private static String parseStrictModePackage(String userAgent) {Matcher matcher = STRICT_PACKAGE_PATTERN.matcher(userAgent);List<String> validCandidates = new ArrayList<>();while (matcher.find()) {String candidate = matcher.group();if (isValidPackage(candidate)) {validCandidates.add(candidate);}}// 按长度降序排序,取最长合法包名return validCandidates.stream().max(Comparator.comparingInt(String::length)).orElse("");}/*** 包名有效性验证*/private static boolean isValidPackage(String packageName) {if (packageName == null || packageName.length() < 6) {return false;}if (!STRICT_PACKAGE_PATTERN.matcher(packageName).matches()) {return false;}String firstSegment = packageName.split("\\.")[0];return !SYSTEM_WORD_BLACKLIST.contains(firstSegment.toUpperCase());}/*** 浏览器包名判断*/private static boolean isBrowserPackage(String packageName) {return BROWSER_PACKAGE_SET.contains(packageName);}
}
http://www.lryc.cn/news/2404918.html

相关文章:

  • 96.如何使用C#实现串口发送? C#例子
  • 【工具使用】STM32CubeMX-FreeRTOS操作系统-信号标志、互斥锁、信号量篇
  • [P2P]并发模式
  • Cloudflare 免费域名邮箱 支持 Catch-all 无限别名收件
  • 大数据Spark(六十一):Spark基于Standalone提交任务流程
  • 学习记录:DAY32
  • next,react封装axios,http请求
  • 元图CAD:一键解锁PDF转CAD,OCR技术赋能高效转换
  • Android 平台RTSP/RTMP播放器SDK接入说明
  • Nodejs工程化实践:构建高性能前后端交互系统
  • STM32什么是寄存器
  • Linux 的 find 命令使用指南
  • 第六个微信小程序:教师工具集
  • 记录一个用了很久的git提交到github和gitee比较方便的方法
  • Qt Qml模块功能及功能解析
  • 前端八股之JS的原型链
  • NLP学习路线图(二十九):BERT及其变体
  • 机器翻译模型笔记
  • Ref vs. Reactive:Vue 3 响应式变量的最佳选择指南
  • 让视觉基础模型(VFMs)像大语言模型(LLMs)一样“会思考”​
  • 现代前端框架的发展与演进
  • 【LLM-Agent】智能体的记忆缓存设计
  • 一起学Spring AI:核心概念
  • Oracle业务用户的存储过程个数及行数统计
  • PicSharp(图片压缩工具) v1.1.6
  • 前端文件下载常用方式详解
  • 【DAY42】Grad-CAM与Hook函数
  • 如何生成和制作PDF文件
  • 【K8S系列】Kubernetes 中 Pod(Java服务)启动缓慢的深度分析与解决方案
  • 【Java学习笔记】StringBuilder类(重点)