Add production-ready scraper with README

This commit is contained in:
Flash
2026-04-10 15:29:44 +00:00
commit 75f51121ea
1882 changed files with 350270 additions and 0 deletions

52
app/main.py Normal file
View File

@@ -0,0 +1,52 @@
import httpx
from fastapi import FastAPI, HTTPException
app = FastAPI(title="GISP Scraper API (API-Direct)")
# The API endpoint identified from network inspection
API_URL = "https://gisp.gov.ru/pp719v2/pub/prod/b/"
async def fetch_gisp_data(registry_number: str):
# Constructing the filter payload based on what we saw in the Network tab
# We will remove the restrictive date filters (res_valid_till, etc.)
payload = {
"opt": {
"sort": None,
"requireTotalCount": True,
"searchOperation": "contains",
"searchValue": None,
"skip": 0,
"take": 10,
"userData": {},
"filter": ["product_reg_number_2023", "contains", registry_number]
}
}
async with httpx.AsyncClient() as client:
try:
response = await client.post(API_URL, json=payload, timeout=30.0)
response.raise_for_status()
data = response.json()
# GISP usually returns { "data": [ ... ], "totalCount": N }
if "data" in data and len(data["data"]) > 0:
# Return the URL or specific entry found
# Based on the DevExtreme schema, we might need a specific ID to form the URL
return data["data"]
return None
except Exception as e:
print(f"API scraping error: {e}")
return None
@app.get("/scrape/{registry_number:path}")
async def get_product_link(registry_number: str):
results = await fetch_gisp_data(registry_number)
if not results:
raise HTTPException(status_code=404, detail="Product not found or scraping failed")
# We can refine this to return the specific URL or the whole result object
return {"registry_number": registry_number, "results": results}
@app.get("/health")
def health():
return {"status": "ok"}