17
Any way to download all images from an lemmy communities
(lemmings.world)
There isn't a straightforward way to do it as far as I can see - most likely because instances usually don't want tonnes of requests for tonnes of data.
If you have knowledge in programming it would be feasible to write a script that either uses the Lemmy API to get this, or otherwise web scrape it.
People tend to hate on AI but this is what it was made for.
all images part1 https://files.catbox.moe/1o0cgg.zip
all images part2 https://files.catbox.moe/t3pk4k.zip
Slop generated AI script:
import os
import requests
from urllib.parse import urlparse
from tqdm import tqdm
import logging
# Set up logging
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# Config
COMMUNITY = "albumartporn"
LEMMA_INSTANCE = "https://lemmy.world/" # You can change this to a different instance
DEST_FOLDER = "albumartporn_images"
MAX_PAGES = 100 # Increased to download more images
SORT = "TopAll" # Changed to get the best quality images first
os.makedirs(DEST_FOLDER, exist_ok=True)
def get_posts(page):
url = f"{LEMMA_INSTANCE}/api/v3/post/list"
params = {
"community_name": COMMUNITY,
"sort": SORT,
"page": page
}
try:
logger.debug(f"Fetching posts from page {page}")
resp = requests.get(url, params=params)
resp.raise_for_status()
posts = resp.json().get("posts", [])
logger.debug(f"Found {len(posts)} posts on page {page}")
return posts
except Exception as e:
logger.error(f"Error fetching posts from page {page}: {e}")
return []
def download_image(url, filename):
try:
logger.debug(f"Downloading image from {url}")
resp = requests.get(url, stream=True, timeout=10)
resp.raise_for_status()
file_size = int(resp.headers.get('content-length', 0))
logger.debug(f"Image size: {file_size} bytes")
with open(filename, "wb") as f:
for chunk in resp.iter_content(1024):
f.write(chunk)
logger.debug(f"Successfully downloaded {filename}")
except Exception as e:
logger.error(f"Failed to download {url}: {e}")
def is_image_url(url):
is_img = url.lower().endswith((".jpg", ".jpeg", ".png", ".gif", ".webp"))
logger.debug(f"URL {url} is image: {is_img}")
return is_img
def main():
logger.info(f"Starting download from {COMMUNITY} community")
logger.info(f"Sorting by: {SORT}")
logger.info(f"Maximum pages to process: {MAX_PAGES}")
image_count = 0
for page in range(1, MAX_PAGES + 1):
logger.info(f"Processing page {page}/{MAX_PAGES}")
posts = get_posts(page)
if not posts:
logger.warning(f"No more posts on page {page}.")
break
for post in tqdm(posts, desc=f"Page {page}"):
post_data = post.get("post", {})
url = post_data.get("url")
if not url:
logger.debug("Post has no URL, skipping")
continue
if not is_image_url(url):
logger.debug(f"URL is not an image: {url}")
continue
parsed_url = urlparse(url)
filename = os.path.basename(parsed_url.path)
filepath = os.path.join(DEST_FOLDER, filename)
if os.path.exists(filepath):
logger.debug(f"File already exists: {filepath}")
continue
download_image(url, filepath)
image_count += 1
logger.info(f"โ
Download complete. Downloaded {image_count} images.")
if __name__ == "__main__":
main()
A loosely moderated place to ask open-ended questions
Search asklemmy ๐
If your post meets the following criteria, it's welcome here!
Looking for support?
Looking for a community?
~Icon~ ~by~ ~@Double_[email protected]~