Practical examples of consuming PaperStack API data — building question banks, filters, and analytics.

Using the Data

This guide shows practical patterns for working with PaperStack data in your applications.

Fetching Papers

Full Paper vs Subject Extracts

You can fetch either the full paper (all subjects merged) or individual subject files:

import requests

headers = {"Authorization": "Bearer ps_xxxxx"}
base = "https://api.paperstack.qzz.io"

# Full paper — questions in global order (1-200)
paper = requests.get(f"{base}/neet/2024/s1/paper.json", headers=headers).json()
print(f"Total questions: {paper['total']}")

# Subject extract — questions renumbered 1..N
physics = requests.get(f"{base}/neet/2024/s1/physics.json", headers=headers).json()
print(f"Physics questions: {physics['total']}")

Shift Discovery

Always discover available shifts before querying:

# List years
years_resp = requests.get(f"{base}/neet").json()
print(f"Available years: {years_resp['years']}")

# List shifts for a year
shifts_resp = requests.get(f"{base}/neet/2024").json()
for shift in shifts_resp["shifts"]:
    print(f"{shift['shift']}: {shift['total']} questions, checksum: {shift['checksum']}")

Filtering Questions

By Subject and Type

import json

data = requests.get(f"{base}/neet/2024/s1/paper.json", headers=headers).json()

mcqs = [q for q in data["questions"] if q["type"] == "mcq"]
nat_questions = [q for q in data["questions"] if q["type"] == "nat"]
print(f"MCQs: {len(mcqs)}, NATs: {len(nat_questions)}")

By Topic

from collections import Counter

topic_counts = Counter(q["topic"] for q in data["questions"])
for topic, count in topic_counts.most_common(10):
    print(f"{topic}: {count} questions")

By Marks

# Find high-value questions
high_value = [q for q in data["questions"] if q["marks"] >= 4]
print(f"High-value questions: {len(high_value)}")

# Calculate total marks
total_marks = sum(q["marks"] for q in data["questions"])
print(f"Total possible marks: {total_marks}")

Building a Question Bank

Create a Random Practice Set

import random

def create_practice_set(questions, count=10):
    selected = random.sample(questions, min(count, len(questions)))
    return [
        {
            "id": q["id"],
            "question": q["text"],
            "type": q["type"],
            "options": q["options"],
            "topic": q["topic"],
            "marks": q["marks"],
        }
        for q in selected
    ]

practice = create_practice_set(data["questions"], 10)
print(json.dumps(practice, indent=2))

Topic-Focused Practice

def get_questions_by_topic(questions, topic):
    return [q for q in questions if q["topic"] == topic]

# Practice specific topics
kinematics = get_questions_by_topic(data["questions"], "kinematics")
print(f"Found {len(kinematics)} kinematics questions")

Working with Diagrams

Download All Diagrams

import os

def download_diagrams(questions, headers, base, exam, year, shift):
    os.makedirs("diagrams", exist_ok=True)
    for q in questions:
        if q.get("diagrams"):
            for diagram in q["diagrams"]:
                file = diagram["file"]
                resp = requests.get(
                    f"{base}/{exam}/{year}/{shift}/{file}",
                    headers=headers
                )
                local_name = f"{q['id']}_{os.path.basename(file)}"
                with open(f"diagrams/{local_name}", "wb") as f:
                    f.write(resp.content)
                print(f"Downloaded: {local_name}")

Analytics

Topic Distribution

import json
from collections import Counter
import matplotlib.pyplot as plt

topics = Counter(q["topic"] for q in data["questions"])

# Plot top 15 topics
top_15 = topics.most_common(15)
labels, values = zip(*top_15)

plt.figure(figsize=(10, 6))
plt.barh(range(len(labels)), values)
plt.yticks(range(len(labels)), labels)
plt.xlabel("Question count")
plt.title(f"Topic Distribution — NEET 2024 Physics")
plt.gca().invert_yaxis()
plt.tight_layout()
plt.savefig("topic_distribution.png")

Difficulty Analysis

from collections import Counter

difficulty = Counter(q.get("difficulty") for q in data["questions"])
for level, count in difficulty.most_common():
    print(f"{level or 'unrated'}: {count} questions ({count/len(data['questions'])*100:.0f}%)")

Type Breakdown

type_counts = Counter(q["type"] for q in data["questions"])
total = len(data["questions"])
for qtype, count in type_counts.most_common():
    print(f"{qtype}: {count} ({count/total*100:.0f}%)")

Data Verification

Every dataset includes a SHA-256 checksum for integrity verification:

import hashlib

# Compute checksum locally
content = json.dumps(data, separators=(",", ":")).encode("utf-8")
local_hash = hashlib.sha256(content).hexdigest()

# Compare with API-reported checksum
assert local_hash == data["checksum"], "Data integrity check failed!"
print("Checksum verified ✓")

JavaScript / TypeScript Examples

const headers = { Authorization: "Bearer ps_xxxxx" };
const base = "https://api.paperstack.qzz.io";

async function fetchPaper(exam, year, shift, file) {
  const res = await fetch(`${base}/${exam}/${year}/${shift}/${file}`, { headers });
  return res.json();
}

// Usage
const paper = await fetchPaper("neet", "2024", "s1", "paper.json");
console.log(`Schema: ${paper.schema}, Questions: ${paper.total}`);
console.log(`Checksum: ${paper.checksum}`);

// Group by subject
const bySubject = {};
for (const q of paper.questions) {
  (bySubject[q.subject] ??= []).push(q);
}
console.log(Object.keys(bySubject));

Response Validation

Check that the response matches expected structure:

def validate_paper(data):
    required = ["schema", "exam", "year", "shift", "subjects", "total", "questions"]
    for field in required:
        assert field in data, f"Missing field: {field}"
    assert data["schema"] == "v4", f"Unexpected schema: {data['schema']}"
    assert len(data["questions"]) == data["total"], "Question count mismatch"
    print("Response valid ✓")

Data Freshness

How to Detect Stale Data

Every dataset includes two timestamp fields:

Field	Location	Description
`scrapedAt`	Response root	When the paper was last scraped from the source PDF
`generatedAt`	`provenance` object	When the JSON dataset was generated

def is_stale(response_data, max_age_days=30):
    scraped = datetime.fromisoformat(response_data["scrapedAt"].replace("Z", "+00:00"))
    age = datetime.now(timezone.utc) - scraped
    return age.days > max_age_days

if is_stale(data):
    print("Warning: Data may be outdated. Check for newer versions.")

Update Frequency

Exam	Update Pattern	Announcement
NEET	Within 24h of official answer key release	Blog + newsletter
JEE Main	Within 48h of each session	Blog + newsletter
JEE Advanced	Within 48h of result	Blog + newsletter

Changes are announced via the changelog and api.version_deprecated webhook event.

Batch Downloading All Papers

Download everything for offline analysis:

import requests
import time

def download_all_papers(exam, api_key, max_years=5):
    headers = {"Authorization": f"Bearer {api_key}"}
    base = "https://api.paperstack.qzz.io"

    # Discover years
    years_resp = requests.get(f"{base}/{exam}", headers=headers).json()
    years = years_resp["years"][:max_years]

    all_papers = []
    for year in years:
        shifts_resp = requests.get(f"{base}/{exam}/{year}", headers=headers).json()
        for shift_info in shifts_resp["shifts"]:
            shift = shift_info["shift"]
            resp = requests.get(
                f"{base}/{exam}/{year}/{shift}/paper.json",
                headers=headers
            )
            if resp.status_code == 200:
                all_papers.append(resp.json())
                print(f"Downloaded {exam}/{year}/{shift}")
            time.sleep(0.5)  # Respect rate limits

    return all_papers

papers = download_all_papers("neet", "ps_xxxxx", max_years=3)
print(f"Downloaded {len(papers)} papers")

Using Cached Checksums for Skip Logic

The shifts endpoint returns pre-computed checksums. Use them to avoid re-downloading unchanged papers:

def get_cached_checksums(cache_file="checksums.json"):
    try:
        with open(cache_file) as f:
            return json.load(f)
    except FileNotFoundError:
        return {}

def needs_update(local_checksums, remote_shift):
    shift_name = remote_shift["shift"]
    cached = local_checksums.get(shift_name)
    return cached != remote_shift["checksum"]

Rate Limiting Best Practices

import time

def fetch_with_retry(url, headers, max_retries=3):
    for attempt in range(max_retries):
        resp = requests.get(url, headers=headers)
        if resp.status_code == 200:
            return resp.json()
        if resp.status_code == 429:
            wait = min(2 ** attempt, 30)
            print(f"Rate limited. Waiting {wait}s...")
            time.sleep(wait)
            continue
        resp.raise_for_status()
    raise Exception("Max retries exceeded")

Was this page helpful?

Using the Data

On this page