UserAgent包名识别工具
提示:文章写完后,目录可以自动生成,如何生成可参考右边的帮助文档
文章目录
- 背景
- 具体实现
背景
为了更准确地分析用户下单行为的来源渠道,并实现精细化运营与风险控制,我们希望在用户下单时,能够通过请求中的 User-Agent(UA)信息 提取其使用的客户端 App 的包名(Package Name)
具体实现
import com.ejoined.commons.plugin.utils.StringUtils;
import java.net.URLDecoder;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;/*** UserAgent包名识别工具*/
public class ComprehensiveUserAgentParserUtil {// 应用分类枚举private enum AppCategory {SOCIAL, ECOMMERCE, FOOD_DELIVERY, VIDEO, UTILITY}// 预定义应用库(关键词 -> 应用信息)private static final Map<String, AppInfo> PREDEFINED_APP_LIBRARY = new HashMap<>();// 浏览器包名集合(用于误匹配过滤)private static final Set<String> BROWSER_PACKAGE_SET = new HashSet<>(Arrays.asList("com.android.chrome", "com.UCMobile", "com.tencent.mtt", "com.apple.mobilesafari"));static {// 社交类应用注册(支持中英文关键词)registerApp("微信", "MicroMessenger", "com.tencent.mm", AppCategory.SOCIAL);registerApp("微信", "微信", "com.tencent.mm", AppCategory.SOCIAL);registerApp("微信", "WeChat", "com.tencent.mm", AppCategory.SOCIAL);registerApp("QQ", "QQ", "com.tencent.mobileqq", AppCategory.SOCIAL);registerApp("微博", "Weibo", "com.sina.weibo", AppCategory.SOCIAL);registerApp("快手", "Kwai", "com.kuaishou.nebula", AppCategory.SOCIAL);registerApp("快手极速版", "KwaiLite", "com.kuaishou.nebula.lite", AppCategory.SOCIAL);registerApp("小红书", "xingin", "com.xingin.xhs", AppCategory.SOCIAL);registerApp("知乎", "Zhihu", "com.zhihu.android", AppCategory.SOCIAL);// 电商类应用注册registerApp("淘宝", "AliApp(TB", "com.taobao.taobao", AppCategory.ECOMMERCE);registerApp("天猫", "AliApp(TM", "com.tmall.wireless", AppCategory.ECOMMERCE);registerApp("京东", "JD4iPhone", "com.jingdong.app.mall", AppCategory.ECOMMERCE);registerApp("拼多多", "pinduoduo", "com.xunmeng.pinduoduo", AppCategory.ECOMMERCE);registerApp("亚马逊", "Amazon", "com.amazon.mShop.android", AppCategory.ECOMMERCE);registerApp("唯品会", "vipshop", "com.achievo.vipshop", AppCategory.ECOMMERCE);// 外卖类应用注册registerApp("美团", "waimai", "com.sankuai.meituan", AppCategory.FOOD_DELIVERY);registerApp("饿了么", "Eleme", "me.ele", AppCategory.FOOD_DELIVERY);registerApp("百度外卖", "baidu.waimai", "com.baidu.waimai", AppCategory.FOOD_DELIVERY);// 视频类应用注册registerApp("抖音", "Aweme", "com.ss.android.ugc.aweme", AppCategory.VIDEO);registerApp("TikTok", "com.zhiliaoapp.musically", "com.zhiliaoapp.musically", AppCategory.VIDEO);registerApp("爱奇艺", "IQIYI", "com.qiyi.video", AppCategory.VIDEO);registerApp("B站", "BiliBili", "tv.danmaku.bili", AppCategory.VIDEO);registerApp("优酷", "Youku", "com.youku.phone", AppCategory.VIDEO);registerApp("腾讯视频", "TencentVideo", "com.tencent.qqlive", AppCategory.VIDEO);// 工具类应用注册registerApp("支付宝", "AlipayClient", "com.eg.android.AlipayGphone", AppCategory.UTILITY);
// registerApp("Chrome浏览器", "Chrome", "com.android.chrome", AppCategory.UTILITY);registerApp("华为应用商店", "com.huawei.appmarket", "com.huawei.appmarket", AppCategory.UTILITY);}// 应用信息结构体private static class AppInfo {final String appName;final String packageName;final Pattern matchPattern;public AppInfo(String appName, String keyword, String packageName) {this.appName = appName;this.packageName = packageName;this.matchPattern = Pattern.compile(Pattern.quote(keyword) + "/?([^\\s\\(\\)]+)",Pattern.CASE_INSENSITIVE);}}// 注册应用方法private static void registerApp(String appName, String keyword, String packageName, AppCategory category) {PREDEFINED_APP_LIBRARY.put(keyword, new AppInfo(appName, keyword, packageName));}// 严格包名匹配模式(至少包含两个点)private static final Pattern STRICT_PACKAGE_PATTERN = Pattern.compile("^[a-zA-Z_][a-zA-Z0-9_]*(\\.[a-zA-Z_][a-zA-Z0-9_]*){2,}$");// 系统词汇黑名单(过滤干扰词)private static final Set<String> SYSTEM_WORD_BLACKLIST = new HashSet<>(Arrays.asList("Linux", "Android", "iPhone", "iPad", "Windows", "Mac", "iOS", "CPU", "Build","NetType", "Language", "Version", "Mobile", "Safari", "WebKit", "wv", "KHTML"));/*** 主解析方法:提取应用包名* @param userAgent 用户代理字符串* @return 解析出的应用包名,未匹配则返回空字符串*/public static String extractPackageName(String userAgent) {if (StringUtils.isBlank(userAgent)) {return "";}// 1. 后缀路径解析(处理类似 /版本号/包名 的结构)String packageName = parseSuffixPackage(userAgent);if (StringUtils.isNotBlank(packageName) && !isBrowserPackage(packageName)) {return packageName;}// 2. 预定义应用匹配packageName = matchPredefinedApps(userAgent);if (StringUtils.isNotBlank(packageName) && !isBrowserPackage(packageName)) {return packageName;}// 3. 应用商店格式解析(如 (com.huawei.appmarket; 版本号))packageName = parseAppStoreFormatPackage(userAgent);if (StringUtils.isNotBlank(packageName) && !isBrowserPackage(packageName)) {return packageName;}// 4. 启发式匹配(版本号关联、括号内容)packageName = parseHeuristicPackage(userAgent);if (StringUtils.isNotBlank(packageName) && !isBrowserPackage(packageName)) {return packageName;}// 5. 严格模式兜底(全局匹配合法包名)packageName = parseStrictModePackage(userAgent);if (StringUtils.isNotBlank(packageName) && !isBrowserPackage(packageName)) {return packageName;} else {return "";}}/*** 解析后缀路径中的包名(如 open_news_u_s/6817/cn.copper.fokapi.mysterious)*/private static String parseSuffixPackage(String userAgent) {try {String decodedUa = URLDecoder.decode(userAgent);Pattern pattern = Pattern.compile("/\\d+/([\\w\\.]+)(?:\\s|$)");Matcher matcher = pattern.matcher(decodedUa);if (matcher.find()) {String candidate = matcher.group(1);return isValidPackage(candidate) ? candidate : "";}} catch (Exception e) {// 忽略解码异常}return "";}/*** 匹配预定义应用库*/private static String matchPredefinedApps(String userAgent) {for (AppInfo appInfo : PREDEFINED_APP_LIBRARY.values()) {Matcher matcher = appInfo.matchPattern.matcher(userAgent);if (matcher.find()) {return appInfo.packageName;}}return "";}/*** 解析应用商店格式包名(如 (com.huawei.appmarket; 11.0.0))*/private static String parseAppStoreFormatPackage(String userAgent) {Pattern pattern = Pattern.compile("\\(([^;]+);", Pattern.CASE_INSENSITIVE);Matcher matcher = pattern.matcher(userAgent);if (matcher.find()) {String candidate = matcher.group(1).trim();return isValidPackage(candidate) ? candidate : "";}return "";}/*** 启发式匹配(版本号关联、括号内容解析)*/private static String parseHeuristicPackage(String userAgent) {// 模式1:包名/版本号 格式匹配Pattern versionPattern = Pattern.compile("([a-zA-Z_][a-zA-Z0-9_\\.]*)/[0-9\\.]+",Pattern.CASE_INSENSITIVE);Matcher versionMatcher = versionPattern.matcher(userAgent);if (versionMatcher.find()) {String candidate = versionMatcher.group(1);if (isValidPackage(candidate)) {return candidate;}}// 模式2:括号内内容解析Pattern bracketPattern = Pattern.compile("\\(([^)]+)\\)", Pattern.CASE_INSENSITIVE);Matcher bracketMatcher = bracketPattern.matcher(userAgent);while (bracketMatcher.find()) {String content = bracketMatcher.group(1);for (String part : content.split(";|,| ")) {if (isValidPackage(part)) {return part;}}}return "";}/*** 严格模式匹配合法包名(全局搜索)*/private static String parseStrictModePackage(String userAgent) {Matcher matcher = STRICT_PACKAGE_PATTERN.matcher(userAgent);List<String> validCandidates = new ArrayList<>();while (matcher.find()) {String candidate = matcher.group();if (isValidPackage(candidate)) {validCandidates.add(candidate);}}// 按长度降序排序,取最长合法包名return validCandidates.stream().max(Comparator.comparingInt(String::length)).orElse("");}/*** 包名有效性验证*/private static boolean isValidPackage(String packageName) {if (packageName == null || packageName.length() < 6) {return false;}if (!STRICT_PACKAGE_PATTERN.matcher(packageName).matches()) {return false;}String firstSegment = packageName.split("\\.")[0];return !SYSTEM_WORD_BLACKLIST.contains(firstSegment.toUpperCase());}/*** 浏览器包名判断*/private static boolean isBrowserPackage(String packageName) {return BROWSER_PACKAGE_SET.contains(packageName);}
}