x搜索新增了x-client-transaction-id的验证
之前公司电脑不能随便安装软件也就没法写了,最近被裁员了,开始学习,又有时间了。记录下学习的过程。
x的x-client-transaction-id这个参数一直都有只是没有启用,5月1前后这个参数在检索模块突然启用了,有些检索抓取的程序就跑步起来了。但是访问主页账户信息和帖子没有启用。
谷歌x-client-transaction-id这个有很多内容和相关的帖子。也有现成的包,但是很多人看不懂具体参数是啥,如何使用。我就研究了下测试了下。
第一步,像https://github.com/langkor/x-client-transaction
这个库就有相关的说明,只是不是python。翻译下就可以看懂了。
第二部,查找python包https://github.com/iSarabjitDhiman/XClientTransaction
这个就可以了,用pip就可以安装。
pip install XClientTransaction -U --no-cache-dir
def handle_x_migration(session):# for python requests -> session = requests.Session()# session.headers = generate_headers()# 发送请求转换成bs4response = session.request(method="GET", url="https://x.com")home_page = bs4.BeautifulSoup(response.content, 'html.parser')migration_redirection_url = get_migration_url(response=home_page)if migration_redirection_url:response = session.request(method="GET", url=migration_redirection_url.group(0))home_page = bs4.BeautifulSoup(response.content, 'html.parser')migration_form = get_migration_form(response=home_page)if migration_form:response = session.request(**migration_form)home_page = bs4.BeautifulSoup(response.content, 'html.parser')return home_pagedef get_ondemand_file_url(response: bs4.BeautifulSoup):file_url = Noneon_demand_file = ON_DEMAND_FILE_REGEX.search(str(response))if on_demand_file:filename = on_demand_file.group(1)file_url = ON_DEMAND_FILE_URL.format(filename=filename)return file_url
上面是工具类。
import bs4
import requests
from x_client_transaction.utils import generate_headers, handle_x_migration, get_ondemand_file_url
from urllib.parse import urlparse
from x_client_transaction import ClientTransaction# INITIALIZE SESSION 声明访问对象
session = requests.Session()
session.headers = generate_headers()# GET HOME PAGE RESPONSE
# required only when hitting twitter.com but not x.com
# returns bs4.BeautifulSoup object
home_page_response = handle_x_migration(session=session)# for x.com no migration is required, just simply do
home_page = session.get(url="https://x.com")
home_page_response = bs4.BeautifulSoup(home_page.content, 'html.parser')#获取html首页内容# GET ondemand.s FILE RESPONSE
ondemand_file_url = get_ondemand_file_url(response=home_page_response)#获取url
ondemand_file = session.get(url=ondemand_file_url)
ondemand_file_response = bs4.BeautifulSoup(ondemand_file.content, 'html.parser')# Example 1
# replace the url and http method as per your use case
url = "https://x.com/i/api/1.1/jot/client_event.json"
method = "POST"
path = urlparse(url=url).path
# path will be /i/api/1.1/jot/client_event.json in this case# Example 2
user_by_screen_name_url = 'https://x.com/i/api/graphql/yiE17ccAAu3qwM34bPYZkQ/SearchTimeline?variables={"rawQuery":"nba","count":20,"querySource":"recent_search_click","product":"Top"}&features={"rweb_video_screen_enabled":false,"profile_label_improvements_pcf_label_in_post_enabled":true,"rweb_tipjar_consumption_enabled":true,"verified_phone_label_enabled":false,"creator_subscriptions_tweet_preview_api_enabled":true,"responsive_web_graphql_timeline_navigation_enabled":true,"responsive_web_graphql_skip_user_profile_image_extensions_enabled":false,"premium_content_api_read_enabled":false,"communities_web_enable_tweet_community_results_fetch":true,"c9s_tweet_anatomy_moderator_badge_enabled":true,"responsive_web_grok_analyze_button_fetch_trends_enabled":false,"responsive_web_grok_analyze_post_followups_enabled":true,"responsive_web_jetfuel_frame":false,"responsive_web_grok_share_attachment_enabled":true,"articles_preview_enabled":true,"responsive_web_edit_tweet_api_enabled":true,"graphql_is_translatable_rweb_tweet_is_translatable_enabled":true,"view_counts_everywhere_api_enabled":true,"longform_notetweets_consumption_enabled":true,"responsive_web_twitter_article_tweet_consumption_enabled":true,"tweet_awards_web_tipping_enabled":false,"responsive_web_grok_show_grok_translated_post":false,"responsive_web_grok_analysis_button_from_backend":false,"creator_subscriptions_quote_tweet_preview_enabled":false,"freedom_of_speech_not_reach_fetch_enabled":true,"standardized_nudges_misinfo":true,"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled":true,"longform_notetweets_rich_text_read_enabled":true,"longform_notetweets_inline_media_enabled":true,"responsive_web_grok_image_annotation_enabled":true,"responsive_web_enhance_cards_enabled":false}'
user_by_screen_name_http_method = "GET"
user_by_screen_name_path = urlparse(url=user_by_screen_name_url).path #获取url路径
print(user_by_screen_name_path)
# path will be /i/api/graphql/1VOOyvKkiI3FMmkeDNxM9A/UserByScreenName in this casect = ClientTransaction(home_page_response=home_page_response, ondemand_file_response=ondemand_file_response)#创建对象
transaction_id = ct.generate_transaction_id(method=method, path=path)#传入模式和路径
transaction_id_for_user_by_screen_name_endpoint = ct.generate_transaction_id(method=user_by_screen_name_http_method, path=user_by_screen_name_path)#这个参数估计还没启用print(transaction_id)
print(transaction_id_for_user_by_screen_name_endpoint)
最终实现代码,其实也可以自己一点点逆向分析。但是现成的更好更省事啊