Add files via upload
This commit is contained in:
198
Kleinanzeigen-Preisabfrage-main/app.py
Normal file
198
Kleinanzeigen-Preisabfrage-main/app.py
Normal file
@@ -0,0 +1,198 @@
|
||||
from flask import Flask, request, render_template, jsonify, send_file, session
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import pandas as pd
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
import os
|
||||
import json
|
||||
import time
|
||||
from flask_session import Session
|
||||
import random
|
||||
from threading import Thread, Event
|
||||
|
||||
app = Flask(__name__)
|
||||
app.config['SECRET_KEY'] = 'supersecretkey'
|
||||
app.config['SESSION_TYPE'] = 'filesystem'
|
||||
Session(app)
|
||||
|
||||
progress_data = {}
|
||||
runningproxies = []
|
||||
update_event = Event()
|
||||
|
||||
def load_proxies():
|
||||
urls = {
|
||||
"http": "https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/http.txt",
|
||||
"socks4": "https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/socks4.txt",
|
||||
"socks5": "https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/socks5.txt"
|
||||
}
|
||||
proxies = []
|
||||
for proxy_type, url in urls.items():
|
||||
response = requests.get(url)
|
||||
if response.status_code == 200:
|
||||
proxies.extend([f"{proxy_type}://{line.strip()}" for line in response.text.splitlines() if line.strip()])
|
||||
# print(f"Loaded proxies: {proxies}") # Debugging-Ausgabe
|
||||
return proxies
|
||||
|
||||
def check_proxy(proxy):
|
||||
try:
|
||||
proxy_type = proxy.split("://")[0]
|
||||
response = requests.get("https://www.google.com", proxies={proxy_type: proxy}, timeout=1)
|
||||
return response.status_code == 200
|
||||
except:
|
||||
return False
|
||||
|
||||
def update_running_proxies(proxies):
|
||||
global runningproxies
|
||||
while not update_event.is_set():
|
||||
new_proxies = []
|
||||
for proxy in proxies:
|
||||
if len(new_proxies) >= 20:
|
||||
break
|
||||
if check_proxy(proxy):
|
||||
new_proxies.append(proxy)
|
||||
runningproxies = new_proxies
|
||||
# print(f"Updated running proxies: {runningproxies}") # Debugging-Ausgabe
|
||||
time.sleep(300) # Warte 5 Minuten
|
||||
|
||||
def start_proxy_updater(proxies):
|
||||
updater_thread = Thread(target=update_running_proxies, args=(proxies,))
|
||||
updater_thread.daemon = True
|
||||
updater_thread.start()
|
||||
|
||||
def get_random_proxy():
|
||||
while True:
|
||||
if runningproxies:
|
||||
proxy = random.choice(runningproxies)
|
||||
print(f"Selected proxy: {proxy}") # Debugging-Ausgabe
|
||||
return proxy
|
||||
else:
|
||||
print("No running proxies available.") # Debugging-Ausgabe
|
||||
time.sleep(1) # Warte kurz, bevor erneut versucht wird
|
||||
|
||||
def get_total_pages(query):
|
||||
url = f"https://www.kleinanzeigen.de/s-suchanfrage.html?keywords={query}&page=1"
|
||||
response = requests.get(url)
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
pagination = soup.find('div', class_='pagination')
|
||||
if pagination:
|
||||
pages = pagination.find_all('a')
|
||||
total_pages = int(pages[-2].text) if pages else 1
|
||||
else:
|
||||
total_pages = 1
|
||||
return total_pages
|
||||
|
||||
def fetch_page(query, page, unwanted_words, minprice, maxprice, unwanted_prices, proxies):
|
||||
while True:
|
||||
proxy = get_random_proxy()
|
||||
try:
|
||||
proxy_type = proxy.split("://")[0]
|
||||
url = f"https://www.kleinanzeigen.de/s-suchanfrage.html?keywords={query}&page={page}"
|
||||
response = requests.get(url, proxies={proxy_type: proxy})
|
||||
response.raise_for_status()
|
||||
print(f"Successfully fetched page {page} using proxy {proxy}.") # Debugging-Ausgabe
|
||||
break
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"Failed to fetch page {page} using proxy {proxy}. Error: {e}") # Debugging-Ausgabe
|
||||
continue
|
||||
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
items = []
|
||||
|
||||
for item in soup.find_all('li', class_='ad-listitem'):
|
||||
title_tag = item.find('a', class_='ellipsis')
|
||||
title = title_tag.text.strip().lower() if title_tag else 'n/a'
|
||||
|
||||
price_tag = item.find('p', class_='aditem-main--middle--price-shipping--price')
|
||||
price = price_tag.text.strip() if price_tag else 'n/a'
|
||||
|
||||
if price != 'n/a':
|
||||
price = price.replace('€', '').replace('VB', '').replace(',', '').strip()
|
||||
price_value = float(price) if price.isdigit() else 0
|
||||
price = f"{price} €" # Preis mit Eurozeichen
|
||||
else:
|
||||
price_value = 0
|
||||
|
||||
link = "https://www.kleinanzeigen.de" + title_tag['href'] if title_tag else 'n/a'
|
||||
|
||||
if (minprice <= price_value <= maxprice or 'vb' in price.lower()) and not any(word.lower() in title for word in unwanted_words) and price not in unwanted_prices:
|
||||
items.append([title, price, link, price_value])
|
||||
|
||||
return items
|
||||
|
||||
def search_item(query, unwanted_words, minprice, maxprice, maxpages):
|
||||
items = []
|
||||
total_pages = get_total_pages(query)
|
||||
pages_to_search = min(total_pages, maxpages)
|
||||
proxies = load_proxies()
|
||||
start_proxy_updater(proxies)
|
||||
|
||||
with ThreadPoolExecutor(max_workers=10) as executor:
|
||||
futures = [executor.submit(fetch_page, query, page, unwanted_words, minprice, maxprice, ["Zu verschenken"], proxies) for page in range(1, pages_to_search + 1)]
|
||||
for future in futures:
|
||||
items.extend(future.result())
|
||||
|
||||
# Ergebnisse nach Preis sortieren
|
||||
items.sort(key=lambda x: x[3])
|
||||
|
||||
# Entferne doppelte Einträge basierend auf dem Link
|
||||
unique_items = []
|
||||
seen_links = set()
|
||||
for item in items:
|
||||
if item[2] not in seen_links:
|
||||
unique_items.append(item)
|
||||
seen_links.add(item[2])
|
||||
|
||||
return unique_items
|
||||
|
||||
@app.route('/')
|
||||
def index():
|
||||
return render_template('index.html')
|
||||
|
||||
@app.route('/search', methods=['POST'])
|
||||
def search():
|
||||
queries = request.form.get('query').split('\n')
|
||||
unwanted_words = request.form.get('unwanted_words').split('\n')
|
||||
minprice = float(request.form.get('minprice') or 0)
|
||||
maxprice = float(request.form.get('maxprice') or float('inf'))
|
||||
maxpages = int(request.form.get('maxpages') or 0)
|
||||
filename = request.form.get('filename') or 'kleinanzeigen_results'
|
||||
|
||||
session_id = request.cookies.get('session')
|
||||
if session_id not in progress_data:
|
||||
progress_data[session_id] = {'current_item': 0, 'total_items': 0}
|
||||
|
||||
all_items = []
|
||||
progress_data[session_id]['total_items'] = len(queries)
|
||||
|
||||
for i, query in enumerate(queries):
|
||||
query = query.strip()
|
||||
if query:
|
||||
items = search_item(query, unwanted_words, minprice, maxprice, maxpages)
|
||||
all_items.extend(items)
|
||||
progress_data[session_id]['current_item'] = i + 1
|
||||
time.sleep(0.1) # Füge eine kurze Verzögerung hinzu, um die Fortschrittsanzeige zu aktualisieren
|
||||
|
||||
df = pd.DataFrame(all_items, columns=['Artikel', 'Preis', 'Link', 'PriceValue'])
|
||||
df = df.drop(columns=['PriceValue'])
|
||||
filepath = f'uploads/{filename}.xlsx'
|
||||
df.to_excel(filepath, index=False)
|
||||
|
||||
session['filepath'] = filepath
|
||||
return jsonify(all_items)
|
||||
|
||||
@app.route('/progress')
|
||||
def progress():
|
||||
session_id = request.cookies.get('session')
|
||||
return jsonify(progress_data.get(session_id, {'current_item': 0, 'total_items': 0}))
|
||||
|
||||
@app.route('/download')
|
||||
def download():
|
||||
filepath = session.get('filepath')
|
||||
if filepath and os.path.exists(filepath):
|
||||
return send_file(filepath, as_attachment=True)
|
||||
return "File not found", 404
|
||||
|
||||
if __name__ == '__main__':
|
||||
if not os.path.exists('uploads'):
|
||||
os.makedirs('uploads')
|
||||
app.run(debug=True)
|
||||
Reference in New Issue
Block a user