Using the Data

Practical examples of consuming PaperStack API data — building question banks, filters, and analytics.

Using the Data

This guide shows practical patterns for working with PaperStack data in your applications.

Fetching Papers

Full Paper vs Subject Extracts

You can fetch either the full paper (all subjects merged) or individual subject files:

import requests

headers = {"Authorization": "Bearer ps_xxxxx"}
base = "https://api.paperstack.qzz.io"

# Full paper — questions in global order (1-200)
paper = requests.get(f"{base}/neet/2024/s1/paper.json", headers=headers).json()
print(f"Total questions: {paper['total']}")

# Subject extract — questions renumbered 1..N
physics = requests.get(f"{base}/neet/2024/s1/physics.json", headers=headers).json()
print(f"Physics questions: {physics['total']}")

Shift Discovery

Always discover available shifts before querying:

# List years
years_resp = requests.get(f"{base}/neet").json()
print(f"Available years: {years_resp['years']}")

# List shifts for a year
shifts_resp = requests.get(f"{base}/neet/2024").json()
for shift in shifts_resp["shifts"]:
    print(f"{shift['shift']}: {shift['total']} questions, checksum: {shift['checksum']}")

Filtering Questions

By Subject and Type

import json

data = requests.get(f"{base}/neet/2024/s1/paper.json", headers=headers).json()

mcqs = [q for q in data["questions"] if q["type"] == "mcq"]
nat_questions = [q for q in data["questions"] if q["type"] == "nat"]
print(f"MCQs: {len(mcqs)}, NATs: {len(nat_questions)}")

By Topic

from collections import Counter

topic_counts = Counter(q["topic"] for q in data["questions"])
for topic, count in topic_counts.most_common(10):
    print(f"{topic}: {count} questions")

By Marks

# Find high-value questions
high_value = [q for q in data["questions"] if q["marks"] >= 4]
print(f"High-value questions: {len(high_value)}")

# Calculate total marks
total_marks = sum(q["marks"] for q in data["questions"])
print(f"Total possible marks: {total_marks}")

Building a Question Bank

Create a Random Practice Set

import random

def create_practice_set(questions, count=10):
    selected = random.sample(questions, min(count, len(questions)))
    return [
        {
            "id": q["id"],
            "question": q["text"],
            "type": q["type"],
            "options": q["options"],
            "topic": q["topic"],
            "marks": q["marks"],
        }
        for q in selected
    ]

practice = create_practice_set(data["questions"], 10)
print(json.dumps(practice, indent=2))

Topic-Focused Practice

def get_questions_by_topic(questions, topic):
    return [q for q in questions if q["topic"] == topic]

# Practice specific topics
kinematics = get_questions_by_topic(data["questions"], "kinematics")
print(f"Found {len(kinematics)} kinematics questions")

Working with Diagrams

Download All Diagrams

import os

def download_diagrams(questions, headers, base, exam, year, shift):
    os.makedirs("diagrams", exist_ok=True)
    for q in questions:
        if q.get("diagrams"):
            for diagram in q["diagrams"]:
                file = diagram["file"]
                resp = requests.get(
                    f"{base}/{exam}/{year}/{shift}/{file}",
                    headers=headers
                )
                local_name = f"{q['id']}_{os.path.basename(file)}"
                with open(f"diagrams/{local_name}", "wb") as f:
                    f.write(resp.content)
                print(f"Downloaded: {local_name}")

Analytics

Topic Distribution

import json
from collections import Counter
import matplotlib.pyplot as plt

topics = Counter(q["topic"] for q in data["questions"])

# Plot top 15 topics
top_15 = topics.most_common(15)
labels, values = zip(*top_15)

plt.figure(figsize=(10, 6))
plt.barh(range(len(labels)), values)
plt.yticks(range(len(labels)), labels)
plt.xlabel("Question count")
plt.title(f"Topic Distribution — NEET 2024 Physics")
plt.gca().invert_yaxis()
plt.tight_layout()
plt.savefig("topic_distribution.png")

Difficulty Analysis

from collections import Counter

difficulty = Counter(q.get("difficulty") for q in data["questions"])
for level, count in difficulty.most_common():
    print(f"{level or 'unrated'}: {count} questions ({count/len(data['questions'])*100:.0f}%)")

Type Breakdown

type_counts = Counter(q["type"] for q in data["questions"])
total = len(data["questions"])
for qtype, count in type_counts.most_common():
    print(f"{qtype}: {count} ({count/total*100:.0f}%)")

Data Verification

Every dataset includes a SHA-256 checksum for integrity verification:

import hashlib

# Compute checksum locally
content = json.dumps(data, separators=(",", ":")).encode("utf-8")
local_hash = hashlib.sha256(content).hexdigest()

# Compare with API-reported checksum
assert local_hash == data["checksum"], "Data integrity check failed!"
print("Checksum verified ✓")

JavaScript / TypeScript Examples

const headers = { Authorization: "Bearer ps_xxxxx" };
const base = "https://api.paperstack.qzz.io";

async function fetchPaper(exam, year, shift, file) {
  const res = await fetch(`${base}/${exam}/${year}/${shift}/${file}`, { headers });
  return res.json();
}

// Usage
const paper = await fetchPaper("neet", "2024", "s1", "paper.json");
console.log(`Schema: ${paper.schema}, Questions: ${paper.total}`);
console.log(`Checksum: ${paper.checksum}`);

// Group by subject
const bySubject = {};
for (const q of paper.questions) {
  (bySubject[q.subject] ??= []).push(q);
}
console.log(Object.keys(bySubject));

Response Validation

Check that the response matches expected structure:

def validate_paper(data):
    required = ["schema", "exam", "year", "shift", "subjects", "total", "questions"]
    for field in required:
        assert field in data, f"Missing field: {field}"
    assert data["schema"] == "v4", f"Unexpected schema: {data['schema']}"
    assert len(data["questions"]) == data["total"], "Question count mismatch"
    print("Response valid ✓")

Rate Limiting Best Practices

import time

def fetch_with_retry(url, headers, max_retries=3):
    for attempt in range(max_retries):
        resp = requests.get(url, headers=headers)
        if resp.status_code == 200:
            return resp.json()
        if resp.status_code == 429:
            wait = min(2 ** attempt, 30)
            print(f"Rate limited. Waiting {wait}s...")
            time.sleep(wait)
            continue
        resp.raise_for_status()
    raise Exception("Max retries exceeded")

On this page