Add production-ready scraper with README
This commit is contained in:
52
app/main.py
Normal file
52
app/main.py
Normal file
@@ -0,0 +1,52 @@
|
||||
import httpx
|
||||
from fastapi import FastAPI, HTTPException
|
||||
|
||||
app = FastAPI(title="GISP Scraper API (API-Direct)")
|
||||
|
||||
# The API endpoint identified from network inspection
|
||||
API_URL = "https://gisp.gov.ru/pp719v2/pub/prod/b/"
|
||||
|
||||
async def fetch_gisp_data(registry_number: str):
|
||||
# Constructing the filter payload based on what we saw in the Network tab
|
||||
# We will remove the restrictive date filters (res_valid_till, etc.)
|
||||
payload = {
|
||||
"opt": {
|
||||
"sort": None,
|
||||
"requireTotalCount": True,
|
||||
"searchOperation": "contains",
|
||||
"searchValue": None,
|
||||
"skip": 0,
|
||||
"take": 10,
|
||||
"userData": {},
|
||||
"filter": ["product_reg_number_2023", "contains", registry_number]
|
||||
}
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
try:
|
||||
response = await client.post(API_URL, json=payload, timeout=30.0)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
# GISP usually returns { "data": [ ... ], "totalCount": N }
|
||||
if "data" in data and len(data["data"]) > 0:
|
||||
# Return the URL or specific entry found
|
||||
# Based on the DevExtreme schema, we might need a specific ID to form the URL
|
||||
return data["data"]
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f"API scraping error: {e}")
|
||||
return None
|
||||
|
||||
@app.get("/scrape/{registry_number:path}")
|
||||
async def get_product_link(registry_number: str):
|
||||
results = await fetch_gisp_data(registry_number)
|
||||
if not results:
|
||||
raise HTTPException(status_code=404, detail="Product not found or scraping failed")
|
||||
|
||||
# We can refine this to return the specific URL or the whole result object
|
||||
return {"registry_number": registry_number, "results": results}
|
||||
|
||||
@app.get("/health")
|
||||
def health():
|
||||
return {"status": "ok"}
|
||||
Reference in New Issue
Block a user