Add files via upload

This commit is contained in:
2025-11-09 16:11:29 +01:00
committed by GitHub
commit 2cf0836dd1
15 changed files with 4070 additions and 0 deletions

View File

@@ -0,0 +1,198 @@
from flask import Flask, request, render_template, jsonify, send_file, session
import requests
from bs4 import BeautifulSoup
import pandas as pd
from concurrent.futures import ThreadPoolExecutor
import os
import json
import time
from flask_session import Session
import random
from threading import Thread, Event
app = Flask(__name__)
app.config['SECRET_KEY'] = 'supersecretkey'
app.config['SESSION_TYPE'] = 'filesystem'
Session(app)
progress_data = {}
runningproxies = []
update_event = Event()
def load_proxies():
urls = {
"http": "https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/http.txt",
"socks4": "https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/socks4.txt",
"socks5": "https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/socks5.txt"
}
proxies = []
for proxy_type, url in urls.items():
response = requests.get(url)
if response.status_code == 200:
proxies.extend([f"{proxy_type}://{line.strip()}" for line in response.text.splitlines() if line.strip()])
# print(f"Loaded proxies: {proxies}") # Debugging-Ausgabe
return proxies
def check_proxy(proxy):
try:
proxy_type = proxy.split("://")[0]
response = requests.get("https://www.google.com", proxies={proxy_type: proxy}, timeout=1)
return response.status_code == 200
except:
return False
def update_running_proxies(proxies):
global runningproxies
while not update_event.is_set():
new_proxies = []
for proxy in proxies:
if len(new_proxies) >= 20:
break
if check_proxy(proxy):
new_proxies.append(proxy)
runningproxies = new_proxies
# print(f"Updated running proxies: {runningproxies}") # Debugging-Ausgabe
time.sleep(300) # Warte 5 Minuten
def start_proxy_updater(proxies):
updater_thread = Thread(target=update_running_proxies, args=(proxies,))
updater_thread.daemon = True
updater_thread.start()
def get_random_proxy():
while True:
if runningproxies:
proxy = random.choice(runningproxies)
print(f"Selected proxy: {proxy}") # Debugging-Ausgabe
return proxy
else:
print("No running proxies available.") # Debugging-Ausgabe
time.sleep(1) # Warte kurz, bevor erneut versucht wird
def get_total_pages(query):
url = f"https://www.kleinanzeigen.de/s-suchanfrage.html?keywords={query}&page=1"
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
pagination = soup.find('div', class_='pagination')
if pagination:
pages = pagination.find_all('a')
total_pages = int(pages[-2].text) if pages else 1
else:
total_pages = 1
return total_pages
def fetch_page(query, page, unwanted_words, minprice, maxprice, unwanted_prices, proxies):
while True:
proxy = get_random_proxy()
try:
proxy_type = proxy.split("://")[0]
url = f"https://www.kleinanzeigen.de/s-suchanfrage.html?keywords={query}&page={page}"
response = requests.get(url, proxies={proxy_type: proxy})
response.raise_for_status()
print(f"Successfully fetched page {page} using proxy {proxy}.") # Debugging-Ausgabe
break
except requests.exceptions.RequestException as e:
print(f"Failed to fetch page {page} using proxy {proxy}. Error: {e}") # Debugging-Ausgabe
continue
soup = BeautifulSoup(response.text, 'html.parser')
items = []
for item in soup.find_all('li', class_='ad-listitem'):
title_tag = item.find('a', class_='ellipsis')
title = title_tag.text.strip().lower() if title_tag else 'n/a'
price_tag = item.find('p', class_='aditem-main--middle--price-shipping--price')
price = price_tag.text.strip() if price_tag else 'n/a'
if price != 'n/a':
price = price.replace('', '').replace('VB', '').replace(',', '').strip()
price_value = float(price) if price.isdigit() else 0
price = f"{price}" # Preis mit Eurozeichen
else:
price_value = 0
link = "https://www.kleinanzeigen.de" + title_tag['href'] if title_tag else 'n/a'
if (minprice <= price_value <= maxprice or 'vb' in price.lower()) and not any(word.lower() in title for word in unwanted_words) and price not in unwanted_prices:
items.append([title, price, link, price_value])
return items
def search_item(query, unwanted_words, minprice, maxprice, maxpages):
items = []
total_pages = get_total_pages(query)
pages_to_search = min(total_pages, maxpages)
proxies = load_proxies()
start_proxy_updater(proxies)
with ThreadPoolExecutor(max_workers=10) as executor:
futures = [executor.submit(fetch_page, query, page, unwanted_words, minprice, maxprice, ["Zu verschenken"], proxies) for page in range(1, pages_to_search + 1)]
for future in futures:
items.extend(future.result())
# Ergebnisse nach Preis sortieren
items.sort(key=lambda x: x[3])
# Entferne doppelte Einträge basierend auf dem Link
unique_items = []
seen_links = set()
for item in items:
if item[2] not in seen_links:
unique_items.append(item)
seen_links.add(item[2])
return unique_items
@app.route('/')
def index():
return render_template('index.html')
@app.route('/search', methods=['POST'])
def search():
queries = request.form.get('query').split('\n')
unwanted_words = request.form.get('unwanted_words').split('\n')
minprice = float(request.form.get('minprice') or 0)
maxprice = float(request.form.get('maxprice') or float('inf'))
maxpages = int(request.form.get('maxpages') or 0)
filename = request.form.get('filename') or 'kleinanzeigen_results'
session_id = request.cookies.get('session')
if session_id not in progress_data:
progress_data[session_id] = {'current_item': 0, 'total_items': 0}
all_items = []
progress_data[session_id]['total_items'] = len(queries)
for i, query in enumerate(queries):
query = query.strip()
if query:
items = search_item(query, unwanted_words, minprice, maxprice, maxpages)
all_items.extend(items)
progress_data[session_id]['current_item'] = i + 1
time.sleep(0.1) # Füge eine kurze Verzögerung hinzu, um die Fortschrittsanzeige zu aktualisieren
df = pd.DataFrame(all_items, columns=['Artikel', 'Preis', 'Link', 'PriceValue'])
df = df.drop(columns=['PriceValue'])
filepath = f'uploads/{filename}.xlsx'
df.to_excel(filepath, index=False)
session['filepath'] = filepath
return jsonify(all_items)
@app.route('/progress')
def progress():
session_id = request.cookies.get('session')
return jsonify(progress_data.get(session_id, {'current_item': 0, 'total_items': 0}))
@app.route('/download')
def download():
filepath = session.get('filepath')
if filepath and os.path.exists(filepath):
return send_file(filepath, as_attachment=True)
return "File not found", 404
if __name__ == '__main__':
if not os.path.exists('uploads'):
os.makedirs('uploads')
app.run(debug=True)