Arquivos

83 linhas
2.4 KiB
Python

import flask
import requests
from flask import request
from bs4 import BeautifulSoup
from urllib.parse import urlparse, urljoin
app = flask.Flask(__name__)
googlebot_headers = {
"User-Agent": "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.6533.119 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
}
def add_base_tag(html_content, original_url):
soup = BeautifulSoup(html_content, 'html.parser')
parsed_url = urlparse(original_url)
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}/"
# Handle paths that are not root, e.g., "https://x.com/some/path/w.html"
if parsed_url.path and not parsed_url.path.endswith('/'):
base_url = urljoin(base_url, parsed_url.path.rsplit('/', 1)[0] + '/')
base_tag = soup.find('base')
print(base_url)
if not base_tag:
new_base_tag = soup.new_tag('base', href=base_url)
if soup.head:
soup.head.insert(0, new_base_tag)
else:
head_tag = soup.new_tag('head')
head_tag.insert(0, new_base_tag)
soup.insert(0, head_tag)
return str(soup)
def bypass_paywall(url):
"""
Bypass paywall for a given url
"""
if url.startswith("http"):
response = requests.get(url, headers=googlebot_headers)
response.encoding = response.apparent_encoding
return add_base_tag(response.text, response.url)
try:
return bypass_paywall("https://" + url)
except requests.exceptions.RequestException as e:
return bypass_paywall("http://" + url)
@app.route("/")
def main_page():
return flask.send_from_directory(".", "index.html")
@app.route("/article", methods=["POST"])
def show_article():
link = flask.request.form["link"]
try:
return bypass_paywall(link)
except requests.exceptions.RequestException as e:
return str(e), 400
except Exception as exc:
raise exc
@app.route("/", defaults={"path": ""})
@app.route("/<path:path>", methods=["GET"])
def get_article(path):
full_url = request.url
parts = full_url.split("/", 4)
if len(parts) >= 5:
actual_url = "https://" + parts[4].lstrip("/")
try:
return bypass_paywall(actual_url)
except requests.exceptions.RequestException as e:
return str(e), 400
except e:
raise e
else:
return "Invalid URL", 400
app.run(debug=False)