from flask import Flask, request, render_template, jsonify, send_file, session import requests from bs4 import BeautifulSoup import pandas as pd from concurrent.futures import ThreadPoolExecutor import os import time from flask_session import Session import random from threading import Thread, Event, Lock import secrets from werkzeug.utils import secure_filename app = Flask(__name__) app.config['SECRET_KEY'] = os.environ.get('SECRET_KEY', secrets.token_hex(32)) app.config['SESSION_TYPE'] = 'filesystem' Session(app) runningproxies = [] runningproxies_lock = Lock() update_event = Event() updater_thread = None _proxy_cache = {"proxies": [], "expires_at": 0} _proxy_cache_lock = Lock() PROXY_CACHE_TTL = 600 def load_proxies(): current_time = time.time() with _proxy_cache_lock: if _proxy_cache["proxies"] and current_time < _proxy_cache["expires_at"]: return list(_proxy_cache["proxies"]) urls = { "http": "https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/http.txt", "socks4": "https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/socks4.txt", "socks5": "https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/socks5.txt" } proxies = [] for proxy_type, url in urls.items(): try: response = requests.get(url, timeout=5) response.raise_for_status() except requests.RequestException: continue proxies.extend( [f"{proxy_type}://{line.strip()}" for line in response.text.splitlines() if line.strip()] ) with _proxy_cache_lock: _proxy_cache["proxies"] = proxies _proxy_cache["expires_at"] = current_time + PROXY_CACHE_TTL return proxies def check_proxy(proxy): try: proxy_type = proxy.split("://")[0] response = requests.get( "https://www.google.com", proxies={proxy_type: proxy}, timeout=5, ) return response.status_code == 200 except requests.RequestException: return False def update_running_proxies(proxies): global runningproxies while not update_event.is_set(): new_proxies = [] for proxy in proxies: if len(new_proxies) >= 20: break if check_proxy(proxy): new_proxies.append(proxy) with runningproxies_lock: runningproxies = new_proxies if update_event.wait(300): break def start_proxy_updater(proxies): global updater_thread stop_proxy_updater() update_event.clear() updater_thread = Thread(target=update_running_proxies, args=(proxies,)) updater_thread.daemon = True updater_thread.start() def stop_proxy_updater(): global updater_thread update_event.set() if updater_thread and updater_thread.is_alive(): updater_thread.join(timeout=1) updater_thread = None def get_random_proxy(timeout=10): deadline = time.time() + timeout while time.time() < deadline: with runningproxies_lock: available = list(runningproxies) if available: return random.choice(available) if update_event.wait(1): break raise RuntimeError("No running proxies available") def get_total_pages(query): url = f"https://www.kleinanzeigen.de/s-suchanfrage.html?keywords={query}&page=1" try: response = requests.get(url, timeout=10) response.raise_for_status() except requests.RequestException: return 1 soup = BeautifulSoup(response.text, 'html.parser') pagination = soup.find('div', class_='pagination') if pagination: pages = pagination.find_all('a') total_pages = int(pages[-2].text) if pages else 1 else: total_pages = 1 return total_pages def fetch_page(query, page, unwanted_words, minprice, maxprice, unwanted_prices, use_proxies): url = f"https://www.kleinanzeigen.de/s-suchanfrage.html?keywords={query}&page={page}" attempts = 0 max_attempts = 5 last_error = None while attempts < max_attempts: proxies = None if use_proxies: try: proxy = get_random_proxy() proxy_type = proxy.split("://")[0] proxies = {proxy_type: proxy} except RuntimeError as exc: use_proxies = False last_error = exc continue try: response = requests.get(url, proxies=proxies, timeout=10) response.raise_for_status() break except requests.RequestException as exc: last_error = exc attempts += 1 time.sleep(1) else: print(f"Failed to fetch page {page}: {last_error}") return [] soup = BeautifulSoup(response.text, 'html.parser') items = [] for item in soup.find_all('li', class_='ad-listitem'): title_tag = item.find('a', class_='ellipsis') title = title_tag.text.strip().lower() if title_tag else 'n/a' price_tag = item.find('p', class_='aditem-main--middle--price-shipping--price') price = price_tag.text.strip() if price_tag else 'n/a' if price != 'n/a': price = price.replace('€', '').replace('VB', '').replace(',', '').strip() price_value = float(price) if price.isdigit() else 0 price = f"{price} €" # Preis mit Eurozeichen else: price_value = 0 link = "https://www.kleinanzeigen.de" + title_tag['href'] if title_tag else 'n/a' if (minprice <= price_value <= maxprice or 'vb' in price.lower()) and not any(word.lower() in title for word in unwanted_words) and price not in unwanted_prices: items.append([title, price, link, price_value]) return items def search_item(query, unwanted_words, minprice, maxprice, maxpages): items = [] total_pages = get_total_pages(query) pages_to_search = min(total_pages, maxpages) proxies = load_proxies() use_proxies = bool(proxies) if use_proxies: start_proxy_updater(proxies) try: with ThreadPoolExecutor(max_workers=10) as executor: futures = [ executor.submit( fetch_page, query, page, unwanted_words, minprice, maxprice, ["Zu verschenken"], use_proxies, ) for page in range(1, pages_to_search + 1) ] for future in futures: items.extend(future.result()) finally: if use_proxies: stop_proxy_updater() # Ergebnisse nach Preis sortieren items.sort(key=lambda x: x[3]) # Entferne doppelte Einträge basierend auf dem Link unique_items = [] seen_links = set() for item in items: if item[2] not in seen_links: unique_items.append(item) seen_links.add(item[2]) return unique_items @app.route('/') def index(): return render_template('index.html') @app.route('/search', methods=['POST']) def search(): queries = request.form.get('query').split('\n') unwanted_words = request.form.get('unwanted_words').split('\n') minprice = float(request.form.get('minprice') or 0) maxprice = float(request.form.get('maxprice') or float('inf')) maxpages = int(request.form.get('maxpages') or 0) filename = request.form.get('filename') or 'kleinanzeigen_results' filename = secure_filename(filename) if not filename: filename = 'kleinanzeigen_results' if not filename.lower().endswith('.xlsx'): filename = f"{filename}.xlsx" session_progress = session.setdefault('progress', {'current_item': 0, 'total_items': 0}) all_items = [] session_progress['total_items'] = len([q for q in queries if q.strip()]) processed_queries = 0 for query in queries: query = query.strip() if query: items = search_item(query, unwanted_words, minprice, maxprice, maxpages) all_items.extend(items) processed_queries += 1 session_progress['current_item'] = processed_queries session['progress'] = session_progress session.modified = True time.sleep(0.1) # Füge eine kurze Verzögerung hinzu, um die Fortschrittsanzeige zu aktualisieren df = pd.DataFrame(all_items, columns=['Artikel', 'Preis', 'Link', 'PriceValue']) df = df.drop(columns=['PriceValue']) os.makedirs('uploads', exist_ok=True) filepath = os.path.join('uploads', filename) df.to_excel(filepath, index=False) session['filepath'] = filepath session.modified = True return jsonify(all_items) @app.route('/progress') def progress(): return jsonify(session.get('progress', {'current_item': 0, 'total_items': 0})) @app.route('/download') def download(): filepath = session.get('filepath') if filepath and os.path.exists(filepath): return send_file(filepath, as_attachment=True) return "File not found", 404 if __name__ == '__main__': if not os.path.exists('uploads'): os.makedirs('uploads') app.run()