采集JSON解析错误的修复
两段采集来的JSON格式:
一:
{"hwgOnlineId":"554312", "jiwuChatId":"", "phoneCategoryId":"20006", "cuxiaoSeq":{voucherTitle:1,lh
二:
{"pic":"//imgservice.suning.cn/uimg1/b2c/image/rX4cM65dyr_4kndNFwLEgA.jpg_400w_400h_4e","itemDomain":"//"+document.location.hostname,"resRoot":"//script.suning.cn/project/pdsWeb","shopCount":1,"sslStoreCode":"","addCartNumLimit":"199","favoriteStatusSwitch":"1" == "1","thirdEVoucherFlag":"","specialSSLFlag":"","sslLabelText" : "苏宁服务"}
直接使用PHP的json_decode报错
解决思路:
1、手动格式化,将所有","换行
2、半分法删除内容定位报错的代码行
3、找到问题行后过滤格式处理
4、对于json格式中掺杂了javascript代码的情况还可以直接使用v8js引擎处理
解决方案一(修复格式):
$str = file_get_contents('test.txt');$str = preg_replace('@/\*[^/]+\*/@isU','',$str);$str = str_replace('"+document.location.hostname','"',$str);$str = str_replace('"1" == "1"','true',$str);$json = json_decode($str);if(!$json) $json = json2array($str);var_dump(getJsJSON($json));
解决方案二(使用PHP-V8JS扩展):
//以JavaScript环境执行JS
function getJsJSON($str){$v8 = new V8Js();$func = "function getJsJSON(str){document ={location:{hostname:''}}a={$str}return JSON.stringify(a);}b = getJsJSON()";try {//传递参数给js// $v8->str = $str;//执行js$code = $v8->executeString($func);//清空对象,垃圾回收。unset($v8);} catch (V8JsScriptException $e) { dump($e->getMessage());dump($e->getJsSourceLine());dump($e->getJsTrace());// dump($e);}if($code)$code=json_decode($code,true); return $code;
}$js = file_get_contents('test.txt');
var_dump(getJsJSON($js));o0b.cn/ibrad