from urllib.parse import urlparse, parse_qs, unquote import json def extract_ids(url): # Parse the URL parsed = urlparse(url) # Extract product ID from the path path_parts = parsed.path.split('/') product_id = path_parts[-1].split('.')[0] if path_parts else None # Extract and decode the pdp_ext_f parameter query_params = parse_qs(parsed.query) pdp_ext_f_encoded = query_params.get('pdp_ext_f', [None])[0] sku_id = None if pdp_ext_f_encoded: # Decode URL-encoded JSON string pdp_ext_f_decoded = unquote(pdp_ext_f_encoded) try: # Parse JSON to extract sku_id data = json.loads(pdp_ext_f_decoded) sku_id = data.get('sku_id') except json.JSONDecodeError: pass return product_id, sku_id # Example usage url = "https://es.aliexpress.com/item/1005007507612726.html?spm=a2g0o.categorymp.prodcutlist.4.2c194Yk64Yk6EC&pdp_ext_f=%7B%22sku_id%22%3A%2212000041066405249%22%7D&utparam-url=scene%3Asearch%7Cquery_from%3Acategory_navigate_newTab2%7Cx_object_id%3A1005007507612726%7C_p_origin_prod%3A" product_id, sku_id = extract_ids(url) print("Product ID:", product_id) # Output: 1005007507612726 print("SKU ID:", sku_id) # Output: 12000041066405249