from flask import Flask, request, render_template, jsonify, send_file, session import requests from bs4 import BeautifulSoup import pandas as pd from concurrent.futures import ThreadPoolExecutor import os import json import time from flask_session import Session import random from threading import Thread, Event app = Flask(__name__) app.config['SECRET_KEY'] = 'supersecretkey' app.config['SESSION_TYPE'] = 'filesystem' Session(app) progress_data = {} runningproxies = [] update_event = Event() def load_proxies(): urls = { "http": "https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/http.txt", "socks4": "https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/socks4.txt", "socks5": "https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/socks5.txt" } proxies = [] for proxy_type, url in urls.items(): response = requests.get(url) if response.status_code == 200: proxies.extend([f"{proxy_type}://{line.strip()}" for line in response.text.splitlines() if line.strip()]) # print(f"Loaded proxies: {proxies}") # Debugging-Ausgabe return proxies def check_proxy(proxy): try: proxy_type = proxy.split("://")[0] response = requests.get("https://www.google.com", proxies={proxy_type: proxy}, timeout=1) return response.status_code == 200 except: return False def update_running_proxies(proxies): global runningproxies while not update_event.is_set(): new_proxies = [] for proxy in proxies: if len(new_proxies) >= 20: break if check_proxy(proxy): new_proxies.append(proxy) runningproxies = new_proxies # print(f"Updated running proxies: {runningproxies}") # Debugging-Ausgabe time.sleep(300) # Warte 5 Minuten def start_proxy_updater(proxies): updater_thread = Thread(target=update_running_proxies, args=(proxies,)) updater_thread.daemon = True updater_thread.start() def get_random_proxy(): while True: if runningproxies: proxy = random.choice(runningproxies) print(f"Selected proxy: {proxy}") # Debugging-Ausgabe return proxy else: print("No running proxies available.") # Debugging-Ausgabe time.sleep(1) # Warte kurz, bevor erneut versucht wird def get_total_pages(query): url = f"https://www.kleinanzeigen.de/s-suchanfrage.html?keywords={query}&page=1" response = requests.get(url) soup = BeautifulSoup(response.text, 'html.parser') pagination = soup.find('div', class_='pagination') if pagination: pages = pagination.find_all('a') total_pages = int(pages[-2].text) if pages else 1 else: total_pages = 1 return total_pages def fetch_page(query, page, unwanted_words, minprice, maxprice, unwanted_prices, proxies): while True: proxy = get_random_proxy() try: proxy_type = proxy.split("://")[0] url = f"https://www.kleinanzeigen.de/s-suchanfrage.html?keywords={query}&page={page}" response = requests.get(url, proxies={proxy_type: proxy}) response.raise_for_status() print(f"Successfully fetched page {page} using proxy {proxy}.") # Debugging-Ausgabe break except requests.exceptions.RequestException as e: print(f"Failed to fetch page {page} using proxy {proxy}. Error: {e}") # Debugging-Ausgabe continue soup = BeautifulSoup(response.text, 'html.parser') items = [] for item in soup.find_all('li', class_='ad-listitem'): title_tag = item.find('a', class_='ellipsis') title = title_tag.text.strip().lower() if title_tag else 'n/a' price_tag = item.find('p', class_='aditem-main--middle--price-shipping--price') price = price_tag.text.strip() if price_tag else 'n/a' if price != 'n/a': price = price.replace('€', '').replace('VB', '').replace(',', '').strip() price_value = float(price) if price.isdigit() else 0 price = f"{price} €" # Preis mit Eurozeichen else: price_value = 0 link = "https://www.kleinanzeigen.de" + title_tag['href'] if title_tag else 'n/a' if (minprice <= price_value <= maxprice or 'vb' in price.lower()) and not any(word.lower() in title for word in unwanted_words) and price not in unwanted_prices: items.append([title, price, link, price_value]) return items def search_item(query, unwanted_words, minprice, maxprice, maxpages): items = [] total_pages = get_total_pages(query) pages_to_search = min(total_pages, maxpages) proxies = load_proxies() start_proxy_updater(proxies) with ThreadPoolExecutor(max_workers=10) as executor: futures = [executor.submit(fetch_page, query, page, unwanted_words, minprice, maxprice, ["Zu verschenken"], proxies) for page in range(1, pages_to_search + 1)] for future in futures: items.extend(future.result()) # Ergebnisse nach Preis sortieren items.sort(key=lambda x: x[3]) # Entferne doppelte Einträge basierend auf dem Link unique_items = [] seen_links = set() for item in items: if item[2] not in seen_links: unique_items.append(item) seen_links.add(item[2]) return unique_items @app.route('/') def index(): return render_template('index.html') @app.route('/search', methods=['POST']) def search(): queries = request.form.get('query').split('\n') unwanted_words = request.form.get('unwanted_words').split('\n') minprice = float(request.form.get('minprice') or 0) maxprice = float(request.form.get('maxprice') or float('inf')) maxpages = int(request.form.get('maxpages') or 0) filename = request.form.get('filename') or 'kleinanzeigen_results' session_id = request.cookies.get('session') if session_id not in progress_data: progress_data[session_id] = {'current_item': 0, 'total_items': 0} all_items = [] progress_data[session_id]['total_items'] = len(queries) for i, query in enumerate(queries): query = query.strip() if query: items = search_item(query, unwanted_words, minprice, maxprice, maxpages) all_items.extend(items) progress_data[session_id]['current_item'] = i + 1 time.sleep(0.1) # Füge eine kurze Verzögerung hinzu, um die Fortschrittsanzeige zu aktualisieren df = pd.DataFrame(all_items, columns=['Artikel', 'Preis', 'Link', 'PriceValue']) df = df.drop(columns=['PriceValue']) filepath = f'uploads/{filename}.xlsx' df.to_excel(filepath, index=False) session['filepath'] = filepath return jsonify(all_items) @app.route('/progress') def progress(): session_id = request.cookies.get('session') return jsonify(progress_data.get(session_id, {'current_item': 0, 'total_items': 0})) @app.route('/download') def download(): filepath = session.get('filepath') if filepath and os.path.exists(filepath): return send_file(filepath, as_attachment=True) return "File not found", 404 if __name__ == '__main__': if not os.path.exists('uploads'): os.makedirs('uploads') app.run(debug=True)