Reference Data Endpoints
Overview
MSPO provides several reference data endpoints for looking up codes and mappings. These are small, non-paginated datasets that rarely change.
Endpoints Summary
| Endpoint | Records | Description |
|---|---|---|
references/states |
16 | Malaysian states |
references/countries |
~200 | Country codes |
spoc/zones |
11 | MSPO zones |
spoc/zone-state |
16 | Zone-state mapping |
spoc/spocs |
162 | SPOC codes |
spoc/daerah |
~150 | Districts by state |
references/cb |
~30 | Certification bodies |
references/audit-scopes |
~10 | Audit scope types |
references/audit-types |
~5 | Audit types |
references/entity-types |
~10 | Entity type codes |
references/groupmanagers |
~50 | Group manager orgs |
States
Endpoint
Response
{
"data": [
{ "stateId": 1, "state": "Johor" },
{ "stateId": 2, "state": "Kedah" },
{ "stateId": 3, "state": "Kelantan" },
{ "stateId": 4, "state": "Melaka" },
{ "stateId": 5, "state": "Negeri Sembilan" },
{ "stateId": 6, "state": "Pahang" },
{ "stateId": 7, "state": "Perak" },
{ "stateId": 8, "state": "Perlis" },
{ "stateId": 9, "state": "Pulau Pinang" },
{ "stateId": 10, "state": "Sabah" },
{ "stateId": 11, "state": "Sarawak" },
{ "stateId": 12, "state": "Selangor" },
{ "stateId": 13, "state": "Terengganu" },
{ "stateId": 14, "state": "Kuala Lumpur" },
{ "stateId": 15, "state": "Labuan" },
{ "stateId": 16, "state": "Putrajaya" }
]
}
Zones
Endpoint
Note: Trailing ? is required.
Response
{
"data": [
{ "zoneId": 1, "zone": "Zon Utara 1" },
{ "zoneId": 2, "zone": "Zon Utara 2" },
{ "zoneId": 3, "zone": "Zon Tengah 1" },
{ "zoneId": 4, "zone": "Zon Tengah 2" },
{ "zoneId": 5, "zone": "Zon Selatan 1" },
{ "zoneId": 6, "zone": "Zon Selatan 2" },
{ "zoneId": 7, "zone": "Zon Timur 1" },
{ "zoneId": 8, "zone": "Zon Timur 2" },
{ "zoneId": 9, "zone": "Zon Sabah 1" },
{ "zoneId": 10, "zone": "Zon Sabah 2" },
{ "zoneId": 11, "zone": "Zon Sarawak" }
]
}
Zone-State Mapping
Endpoint
Response
{
"data": [
{ "zoneId": 1, "stateId": 8, "state": "Perlis" },
{ "zoneId": 1, "stateId": 2, "state": "Kedah" },
{ "zoneId": 2, "stateId": 9, "state": "Pulau Pinang" },
{ "zoneId": 2, "stateId": 7, "state": "Perak" }
]
}
SPOC Codes
Endpoint
Response
{
"data": [
{ "spocId": 5, "zoneId": 1, "spocCode": "A1" },
{ "spocId": 6, "zoneId": 1, "spocCode": "A2" },
{ "spocId": 28, "zoneId": 2, "spocCode": "B1" }
]
}
SPOC Code Pattern
| Zone | Prefix | Examples |
|---|---|---|
| 1 | A | A1-A23 |
| 2 | B, N, M | B1-B11 |
| 3 | C | C1-C9 |
| 4-5 | J | J1-J43 |
| 6-7 | S | S1-S29 |
| 8-9 | Q | Q1-Q32 |
| 10 | K, P | K1-K3, P1 |
| 11 | T, D | T1-T3, D1-D2 |
Certification Bodies
Endpoint
Response
{
"data": [
{
"cbId": 1,
"cbCode": "CB001",
"cbName": "SIRIM QAS International Sdn Bhd"
},
{
"cbId": 2,
"cbCode": "CB002",
"cbName": "SGS (Malaysia) Sdn Bhd"
}
]
}
GCS Storage
gs://calee_data/raw/mspo/api/
├── references/
│ ├── states/YYYYMMDD/states.parquet
│ ├── countries/YYYYMMDD/countries.parquet
│ └── cb/YYYYMMDD/cb.parquet
└── spoc/
├── zones/YYYYMMDD/zones.parquet
├── zone-state/YYYYMMDD/zone-state.parquet
└── spocs/YYYYMMDD/spocs.parquet
Example Code
Python - Fetch All Reference Data
import requests
import polars as pl
from datetime import datetime
REFERENCE_ENDPOINTS = [
"references/states",
"references/countries",
"references/cb",
"references/audit-scopes",
"references/audit-types",
"references/entity-types",
"spoc/zones",
"spoc/zone-state",
"spoc/spocs",
"spoc/daerah",
]
def fetch_reference_data(endpoint: str) -> list:
"""Fetch data from a reference endpoint."""
base_url = "https://api.mspots.org.my/api"
# Some endpoints need trailing ?
if endpoint.startswith("spoc/"):
url = f"{base_url}/{endpoint}?"
else:
url = f"{base_url}/{endpoint}"
response = requests.get(url, verify=False)
response.raise_for_status()
data = response.json()
return data.get("data", data)
def save_reference_to_gcs(data: list, endpoint: str):
"""Save reference data to GCS."""
from google.cloud import storage
import io
df = pl.DataFrame(data)
date_str = datetime.now().strftime("%Y%m%d")
name = endpoint.split("/")[-1]
path = f"raw/mspo/api/{endpoint}/{date_str}/{name}.parquet"
buffer = io.BytesIO()
df.write_parquet(buffer, compression="zstd")
client = storage.Client()
bucket = client.bucket("calee_data")
blob = bucket.blob(path)
blob.upload_from_file(io.BytesIO(buffer.getvalue()))
print(f"Saved {len(df)} records to gs://calee_data/{path}")
# Fetch and save all reference data
for endpoint in REFERENCE_ENDPOINTS:
try:
data = fetch_reference_data(endpoint)
save_reference_to_gcs(data, endpoint)
except Exception as e:
print(f"Error fetching {endpoint}: {e}")
DuckDB - Query Reference Data
-- Join smallholders with zone reference
SELECT
s.state,
z.zone,
COUNT(*) as smallholders,
SUM(s.certified_area) as total_ha
FROM read_parquet('gs://calee_data/raw/mspo/api/spoc/get-spoc-list-public/*/*.parquet') s
JOIN read_parquet('gs://calee_data/raw/mspo/api/spoc/zones/*/*.parquet') z
ON s.zone_id = z.zoneId
WHERE s.deleted = false
GROUP BY s.state, z.zone
ORDER BY total_ha DESC;
Related
- SPOC Smallholders (Public) - Main smallholder data