Add files via upload

This commit is contained in:
2025-11-09 16:11:29 +01:00
committed by GitHub
commit 2cf0836dd1
15 changed files with 4070 additions and 0 deletions

View File

@@ -0,0 +1,20 @@
# Verwende ein offizielles Python-Laufzeit-Image als Basis
FROM python:3.9-slim
# Setze das Arbeitsverzeichnis im Container
WORKDIR /app
# Kopiere die Anforderungen-Datei in das Arbeitsverzeichnis
COPY requirements.txt requirements.txt
# Installiere die Python-Abhängigkeiten
RUN pip install -r requirements.txt
# Kopiere den Rest des Anwendungscodes in das Arbeitsverzeichnis
COPY . .
# Exponiere den Port, auf dem die Anwendung läuft
EXPOSE 8000
# Definiere den Befehl zum Starten der Anwendung
CMD ["gunicorn", "-w", "4", "-b", "0.0.0.0:5000", "--timeout", "120", "wsgi:app"]

View File

@@ -0,0 +1,74 @@
# Kleinanzeigen Preisabfrage
Dieses Projekt ermöglicht es, Preise von Artikeln auf Kleinanzeigen.de abzufragen und die Ergebnisse in einer Excel-Datei zu speichern.
## Projektstruktur
Kleinanzeigen-Preisabfrage/
├── Dockerfile
├── .gitlab-ci.yml
├── app.py
├── static/
│ ├── script.js
│ └── style.css
└── templates/
└── index.html
## Voraussetzungen
- Docker
- GitLab CI/CD (optional)
## Installation
1. **Repository klonen**:
```sh
git clone https://git.dasposchi.de/DasPoschi/Kleinanzeigen-Preisabfrage
cd Kleinanzeigen-Preisabfrage
```
2. **Docker-Image bauen**:
```sh
docker build -t Kleinanzeigen-Preisabfrage .
```
3. **Docker-Container starten**:
```sh
docker run -d -p 5000:5000 Kleinanzeigen-Preisabfrage
```
## Verwendung
1. Öffne deinen Browser und gehe zu `http://localhost:5000`.
2. Gib die Artikel, unerwünschten Wörter, Mindestpreis und maximale Seitenanzahl ein.
3. Klicke auf "Suchen", um die Preisabfrage zu starten.
4. Lade die Ergebnisse als Excel-Datei herunter.
## Deployment mit GitLab CI/CD
1. **.gitlab-ci.yml Datei**: Die Datei ist bereits im Projekt enthalten und konfiguriert.
2. **Repository zu GitLab hinzufügen**:
```sh
git remote add origin https://git.dasposchi.de/DasPoschi/Kleinanzeigen-Preisabfrage
git push -u origin master
```
3. GitLab CI/CD wird automatisch die Pipeline ausführen und das Projekt deployen.
## Dateien
- **Dockerfile**: Enthält die Anweisungen zum Bauen des Docker-Images.
- **.gitlab-ci.yml**: Definiert die CI/CD-Pipeline für GitLab.
- **app.py**: Flask-Anwendung, die die Preisabfrage durchführt.
- **static/script.js**: JavaScript-Datei für die Formularverarbeitung.
- **static/style.css**: CSS-Datei für das Styling der Anwendung.
- **templates/index.html**: HTML-Datei für die Benutzeroberfläche.
## Lizenz
Dieses Projekt ist unter der MIT-Lizenz lizenziert. Siehe die LICENSE Datei für weitere Details.
## Kontakt
Falls du Fragen oder Anmerkungen hast, kannst du mich unter dasposchi@gmail.com erreichen.

View File

@@ -0,0 +1,198 @@
from flask import Flask, request, render_template, jsonify, send_file, session
import requests
from bs4 import BeautifulSoup
import pandas as pd
from concurrent.futures import ThreadPoolExecutor
import os
import json
import time
from flask_session import Session
import random
from threading import Thread, Event
app = Flask(__name__)
app.config['SECRET_KEY'] = 'supersecretkey'
app.config['SESSION_TYPE'] = 'filesystem'
Session(app)
progress_data = {}
runningproxies = []
update_event = Event()
def load_proxies():
urls = {
"http": "https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/http.txt",
"socks4": "https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/socks4.txt",
"socks5": "https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/socks5.txt"
}
proxies = []
for proxy_type, url in urls.items():
response = requests.get(url)
if response.status_code == 200:
proxies.extend([f"{proxy_type}://{line.strip()}" for line in response.text.splitlines() if line.strip()])
# print(f"Loaded proxies: {proxies}") # Debugging-Ausgabe
return proxies
def check_proxy(proxy):
try:
proxy_type = proxy.split("://")[0]
response = requests.get("https://www.google.com", proxies={proxy_type: proxy}, timeout=1)
return response.status_code == 200
except:
return False
def update_running_proxies(proxies):
global runningproxies
while not update_event.is_set():
new_proxies = []
for proxy in proxies:
if len(new_proxies) >= 20:
break
if check_proxy(proxy):
new_proxies.append(proxy)
runningproxies = new_proxies
# print(f"Updated running proxies: {runningproxies}") # Debugging-Ausgabe
time.sleep(300) # Warte 5 Minuten
def start_proxy_updater(proxies):
updater_thread = Thread(target=update_running_proxies, args=(proxies,))
updater_thread.daemon = True
updater_thread.start()
def get_random_proxy():
while True:
if runningproxies:
proxy = random.choice(runningproxies)
print(f"Selected proxy: {proxy}") # Debugging-Ausgabe
return proxy
else:
print("No running proxies available.") # Debugging-Ausgabe
time.sleep(1) # Warte kurz, bevor erneut versucht wird
def get_total_pages(query):
url = f"https://www.kleinanzeigen.de/s-suchanfrage.html?keywords={query}&page=1"
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
pagination = soup.find('div', class_='pagination')
if pagination:
pages = pagination.find_all('a')
total_pages = int(pages[-2].text) if pages else 1
else:
total_pages = 1
return total_pages
def fetch_page(query, page, unwanted_words, minprice, maxprice, unwanted_prices, proxies):
while True:
proxy = get_random_proxy()
try:
proxy_type = proxy.split("://")[0]
url = f"https://www.kleinanzeigen.de/s-suchanfrage.html?keywords={query}&page={page}"
response = requests.get(url, proxies={proxy_type: proxy})
response.raise_for_status()
print(f"Successfully fetched page {page} using proxy {proxy}.") # Debugging-Ausgabe
break
except requests.exceptions.RequestException as e:
print(f"Failed to fetch page {page} using proxy {proxy}. Error: {e}") # Debugging-Ausgabe
continue
soup = BeautifulSoup(response.text, 'html.parser')
items = []
for item in soup.find_all('li', class_='ad-listitem'):
title_tag = item.find('a', class_='ellipsis')
title = title_tag.text.strip().lower() if title_tag else 'n/a'
price_tag = item.find('p', class_='aditem-main--middle--price-shipping--price')
price = price_tag.text.strip() if price_tag else 'n/a'
if price != 'n/a':
price = price.replace('', '').replace('VB', '').replace(',', '').strip()
price_value = float(price) if price.isdigit() else 0
price = f"{price}" # Preis mit Eurozeichen
else:
price_value = 0
link = "https://www.kleinanzeigen.de" + title_tag['href'] if title_tag else 'n/a'
if (minprice <= price_value <= maxprice or 'vb' in price.lower()) and not any(word.lower() in title for word in unwanted_words) and price not in unwanted_prices:
items.append([title, price, link, price_value])
return items
def search_item(query, unwanted_words, minprice, maxprice, maxpages):
items = []
total_pages = get_total_pages(query)
pages_to_search = min(total_pages, maxpages)
proxies = load_proxies()
start_proxy_updater(proxies)
with ThreadPoolExecutor(max_workers=10) as executor:
futures = [executor.submit(fetch_page, query, page, unwanted_words, minprice, maxprice, ["Zu verschenken"], proxies) for page in range(1, pages_to_search + 1)]
for future in futures:
items.extend(future.result())
# Ergebnisse nach Preis sortieren
items.sort(key=lambda x: x[3])
# Entferne doppelte Einträge basierend auf dem Link
unique_items = []
seen_links = set()
for item in items:
if item[2] not in seen_links:
unique_items.append(item)
seen_links.add(item[2])
return unique_items
@app.route('/')
def index():
return render_template('index.html')
@app.route('/search', methods=['POST'])
def search():
queries = request.form.get('query').split('\n')
unwanted_words = request.form.get('unwanted_words').split('\n')
minprice = float(request.form.get('minprice') or 0)
maxprice = float(request.form.get('maxprice') or float('inf'))
maxpages = int(request.form.get('maxpages') or 0)
filename = request.form.get('filename') or 'kleinanzeigen_results'
session_id = request.cookies.get('session')
if session_id not in progress_data:
progress_data[session_id] = {'current_item': 0, 'total_items': 0}
all_items = []
progress_data[session_id]['total_items'] = len(queries)
for i, query in enumerate(queries):
query = query.strip()
if query:
items = search_item(query, unwanted_words, minprice, maxprice, maxpages)
all_items.extend(items)
progress_data[session_id]['current_item'] = i + 1
time.sleep(0.1) # Füge eine kurze Verzögerung hinzu, um die Fortschrittsanzeige zu aktualisieren
df = pd.DataFrame(all_items, columns=['Artikel', 'Preis', 'Link', 'PriceValue'])
df = df.drop(columns=['PriceValue'])
filepath = f'uploads/{filename}.xlsx'
df.to_excel(filepath, index=False)
session['filepath'] = filepath
return jsonify(all_items)
@app.route('/progress')
def progress():
session_id = request.cookies.get('session')
return jsonify(progress_data.get(session_id, {'current_item': 0, 'total_items': 0}))
@app.route('/download')
def download():
filepath = session.get('filepath')
if filepath and os.path.exists(filepath):
return send_file(filepath, as_attachment=True)
return "File not found", 404
if __name__ == '__main__':
if not os.path.exists('uploads'):
os.makedirs('uploads')
app.run(debug=True)

View File

@@ -0,0 +1,9 @@
services:
web:
build: .
ports:
- "5000:5000"
volumes:
- .:/app
environment:
- FLASK_ENV=development

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,9 @@
Flask==2.0.1
requests[socks]==2.25.1
beautifulsoup4==4.9.3
pandas==1.2.4
Flask-Session==0.3.2
gunicorn==20.1.0
Werkzeug==2.0.1
numpy==1.20.3
openpyxl==3.0.7

View File

@@ -0,0 +1,185 @@
let allItems = [];
let currentPage = 1;
let itemsPerPage = 10;
function searchItems() {
const formData = new FormData(searchForm);
resultsContainer.innerHTML = '';
overlay.style.display = 'flex';
fetch('/search', {
method: 'POST',
body: formData
})
.then(response => response.json())
.then(data => {
currentItems = data;
sortItems(currentItems);
displayItems(currentItems, 1);
overlay.style.display = 'none';
})
.catch(error => {
console.error('Error:', error);
overlay.style.display = 'none';
});
updateProgress();
}
function updateProgress() {
fetch('/progress')
.then(response => response.json())
.then(data => {
const totalItems = data.total_items;
const currentItem = data.current_item;
const progressPercentage = data.progress_percentage;
document.getElementById('progress-text').innerText = `Artikel ${currentItem} von ${totalItems}`;
//document.getElementById('progress-fill').style.width = `${progressPercentage}%`;
//document.getElementById('progress-percentage').innerText = `${progressPercentage}%`;
if (currentItem < totalItems) {
setTimeout(updateProgress, 500); // Poll every second
} else {
document.getElementById('overlay').style.display = 'none';
}
})
.catch(error => console.error('Fehler beim Abrufen des Fortschritts:', error));
}
document.getElementById('search-form').addEventListener('submit', function(event) {
event.preventDefault();
const formData = new FormData(event.target);
document.getElementById('overlay').style.display = 'flex';
fetch('/search', {
method: 'POST',
body: formData
})
.then(response => response.json())
.then(data => {
allItems = data;
currentPage = 1;
displayResults();
document.getElementById('overlay').style.display = 'none';
})
.catch(error => {
console.error('Fehler beim Abrufen der Ergebnisse:', error);
document.getElementById('overlay').style.display = 'none';
});
// Start updating progress
updateProgress();
});
document.getElementById('items-per-page').addEventListener('change', function(event) {
itemsPerPage = parseInt(event.target.value);
currentPage = 1;
displayResults();
});
document.getElementById('sort-filter').addEventListener('change', function(event) {
const sortBy = event.target.value;
if (sortBy === 'alphabet') {
allItems.sort((a, b) => a[0].localeCompare(b[0]));
} else if (sortBy === 'price') {
allItems.sort((a, b) => parseFloat(a[3]) - parseFloat(b[3]));
}
displayResults();
});
document.getElementById('manufacturer-filter').addEventListener('input', function(event) {
const manufacturer = event.target.value.toLowerCase();
const filteredItems = allItems.filter(item => item[0].toLowerCase().includes(manufacturer));
displayResults(filteredItems);
});
function displayResults(items = allItems) {
const resultsDiv = document.getElementById('results');
resultsDiv.innerHTML = '';
const start = (currentPage - 1) * itemsPerPage;
const end = start + itemsPerPage;
const paginatedItems = items.slice(start, end);
paginatedItems.forEach(item => {
const itemDiv = document.createElement('div');
itemDiv.classList.add('item');
itemDiv.innerHTML = `<h3><a href="${item[2]}" target="_blank">${item[0]}</a></h3><p><b>Preis:</b> ${item[1]}</p>`;
resultsDiv.appendChild(itemDiv);
});
displayPagination(items.length);
}
function displayPagination(totalItems) {
const paginationTopDiv = document.getElementById('pagination-top');
const paginationBottomDiv = document.getElementById('pagination-bottom');
paginationTopDiv.innerHTML = '';
paginationBottomDiv.innerHTML = '';
const totalPages = Math.ceil(totalItems / itemsPerPage);
let paginationHTML = `
<span class="page-button" onclick="changePage(1)"><<</span>
<span class="page-button" onclick="changePage(currentPage - 1)"><</span>
`;
if (totalPages <= 9) {
for (let page = 1; page <= totalPages; page++) {
paginationHTML += `
<span class="page-button ${page === currentPage ? 'active' : ''}" onclick="changePage(${page})">${page}</span>
`;
}
} else {
if (currentPage <= 5) {
for (let page = 1; page <= 7; page++) {
paginationHTML += `
<span class="page-button ${page === currentPage ? 'active' : ''}" onclick="changePage(${page})">${page}</span>
`;
}
paginationHTML += `<span class="page-button">...</span>`;
paginationHTML += `
<span class="page-button" onclick="changePage(${totalPages})">${totalPages}</span>
`;
} else if (currentPage > totalPages - 5) {
paginationHTML += `
<span class="page-button" onclick="changePage(1)">1</span>
<span class="page-button">...</span>
`;
for (let page = totalPages - 6; page <= totalPages; page++) {
paginationHTML += `
<span class="page-button ${page === currentPage ? 'active' : ''}" onclick="changePage(${page})">${page}</span>
`;
}
} else {
paginationHTML += `
<span class="page-button" onclick="changePage(1)">1</span>
<span class="page-button">...</span>
`;
for (let page = currentPage - 3; page <= currentPage + 3; page++) {
paginationHTML += `
<span class="page-button ${page === currentPage ? 'active' : ''}" onclick="changePage(${page})">${page}</span>
`;
}
paginationHTML += `<span class="page-button">...</span>`;
paginationHTML += `
<span class="page-button" onclick="changePage(${totalPages})">${totalPages}</span>
`;
}
}
paginationHTML += `
<span class="page-button" onclick="changePage(currentPage + 1)">></span>
<span class="page-button" onclick="changePage(${totalPages})">>></span>
`;
paginationTopDiv.innerHTML = paginationHTML;
paginationBottomDiv.innerHTML = paginationHTML;
}
function changePage(page) {
const totalPages = Math.ceil(allItems.length / itemsPerPage);
if (page < 1) page = 1;
if (page > totalPages) page = totalPages;
currentPage = page;
displayResults();
}

View File

@@ -0,0 +1,172 @@
body {
font-family: Arial, sans-serif;
background-color: #f8f9fa;
margin: 0;
padding: 0;
}
.container {
max-width: 800px;
margin: 50px auto;
padding: 20px;
background-color: #fff;
box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
border-radius: 5px;
}
h1 {
text-align: center;
color: #333;
}
h2 {
margin-bottom: 10px;
color: #333;
}
form {
margin-bottom: 20px;
}
input[type="text"],
input[type="number"],
textarea {
width: calc(100% - 22px);
padding: 10px;
margin-bottom: 10px;
border: 1px solid #ccc;
border-radius: 5px;
}
textarea {
height: 100px;
}
.form-row {
display: flex;
justify-content: space-between;
}
.form-group {
width: calc(50% - 11px);
}
button {
width: 100%;
padding: 10px;
background-color: #007BFF;
color: #fff;
border: none;
border-radius: 5px;
cursor: pointer;
}
button:hover {
background-color: #0056b3;
}
#download-button {
background-color: #28a745;
margin-bottom: 20px;
}
#download-button:hover {
background-color: #218838;
}
#results {
margin-top: 20px;
}
.item {
padding: 10px;
border-bottom: 1px solid #ccc;
}
.item h3 {
margin: 0;
font-size: 18px;
}
.item p {
margin: 5px 0 0;
color: #555;
}
#overlay {
position: fixed;
top: 0;
left: 0;
width: 100%;
height: 100%;
background: rgba(255, 255, 255, 0.9);
display: flex;
justify-content: center;
align-items: center;
z-index: 1000;
flex-direction: column;
}
#throbber {
border: 16px solid #f3f3f3;
border-radius: 50%;
border-top: 16px solid #007BFF;
width: 120px;
height: 120px;
animation: spin 2s linear infinite;
}
@keyframes spin {
0% { transform: rotate(0deg); }
100% { transform: rotate(360deg); }
}
#current-item {
margin-top: 20px;
font-size: 18px;
color: #333;
}
#progress-container {
background: #fff;
padding: 20px;
border-radius: 5px;
text-align: center;
}
#progress-bar {
width: 100%;
background: #ccc;
border-radius: 5px;
overflow: hidden;
margin-top: 10px;
}
#progress-fill {
width: 0;
height: 20px;
background: #007BFF;
}
#pagination-top,
#pagination-bottom {
margin-top: 20px;
text-align: center;
}
.page-button {
padding: 10px;
margin: 5px;
background-color: #007BFF;
color: #fff;
border: none;
border-radius: 5px;
cursor: pointer;
}
.page-button:hover {
background-color: #0056b3;
}
.page-button.active {
background-color: #0056b3;
}

View File

@@ -0,0 +1,85 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Kleinanzeigen Preisabfrage</title>
<link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
</head>
<body>
<div class="container">
<h1>Kleinanzeigen Preisabfrage</h1>
<form id="search-form">
<div class="form-row">
<div class="form-group">
<h2>Artikel suchen</h2>
<textarea id="query" name="query" placeholder="Artikel suchen... (jeder Artikel eine Zeile)" required></textarea>
</div>
<div class="form-group">
<h2>Unerwünschte Wörter</h2>
<textarea id="unwanted_words" name="unwanted_words" placeholder="Unerwünschte Wörter (je Wort eine Zeile)">Suche
Tausche</textarea>
<div>
<label><input type="checkbox" value="Defekt"> Defekt</label>
<label><input type="checkbox" value="Kaputt"> Kaputt</label>
<label><input type="checkbox" value="Beschädigt"> Beschädigt</label>
</div>
</div>
</div>
<div class="form-row">
<div class="form-group">
<h2>Mindestpreis</h2>
<input type="number" id="minprice" name="minprice" placeholder="Mindestpreis">
</div>
<div class="form-group">
<h2>Höchstpreis</h2>
<input type="number" id="maxprice" name="maxprice" placeholder="Höchstpreis">
</div>
<div class="form-group">
<h2>Maximale Seiten</h2>
<input type="number" id="maxpages" name="maxpages" placeholder="Maximale Seiten">
</div>
</div>
<button type="submit">Suchen</button>
</form>
<div class="form-row">
<div class="form-group">
<h2>Suche nach Begriff</h2>
<input type="text" id="manufacturer-filter" placeholder="Begriff">
</div>
<div class="form-group">
<h2>Sortieren nach</h2>
<select id="sort-filter">
<option value="alphabet">Alphabet</option>
<option value="price">Preis</option>
</select>
</div>
</div>
<div class="form-row">
<div class="form-group">
<h2>Artikel pro Seite</h2>
<select id="items-per-page">
<option value="10">10</option>
<option value="20">20</option>
<option value="50">50</option>
<option value="100">100</option>
</select>
</div>
</div>
<div id="pagination-top"></div>
<div id="results"></div>
<div id="pagination-bottom"></div>
<div id="overlay" style="display:none;">
<div id="throbber"></div>
<div id="progress-container">
<p id="progress-text">Artikel 0 von 0</p>
</div>
</div>
</div>
<script src="{{ url_for('static', filename='script.js') }}"></script>
</body>
</html>

View File

@@ -0,0 +1,4 @@
from app import app
if __name__ == "__main__":
app.run()