Skip to the content.

QueryBuilder API Reference

The QueryBuilder class provides a fluent API for constructing complex Europe PMC search queries with type safety and validation.

Class Overview

from pyeuropepmc import QueryBuilder

qb = QueryBuilder(validate=False)  # Optional validation

Constructor

QueryBuilder(validate: bool = False) -> QueryBuilder

Parameters:

Core Methods

keyword()

Add a keyword search term.

keyword(term: str, field: FieldType | None = None) -> QueryBuilder

Parameters:

Returns: QueryBuilder (for method chaining)

Example:

qb.keyword("cancer").keyword("therapy", field="title")

field()

Generic field search with optional value transformation.

field(
    field_name: FieldType,
    value: str | int | bool,
    escape: bool = True,
    transform: Callable[[str | int | bool], str | int | bool] | None = None
) -> QueryBuilder

Parameters:

Returns: QueryBuilder (for method chaining)

Examples:

# Basic field search
qb.field("author", "Smith J")

# With transformation
qb.field("pmcid", "1234567", transform=lambda x: f"PMC{x}" if not str(x).startswith("PMC") else str(x))

date_range()

Add publication date constraints.

date_range(
    start_year: int | None = None,
    end_year: int | None = None,
    start_date: str | None = None,
    end_date: str | None = None
) -> QueryBuilder

Parameters:

Returns: QueryBuilder (for method chaining)

Examples:

# Year range
qb.date_range(start_year=2020, end_year=2023)

# Date range
qb.date_range(start_date="2020-01-01", end_date="2023-12-31")

citation_count()

Add citation count filters.

citation_count(min_count: int | None = None, max_count: int | None = None) -> QueryBuilder

Parameters:

Returns: QueryBuilder (for method chaining)

Examples:

# Papers with at least 10 citations
qb.citation_count(min_count=10)

# Papers with 5-50 citations
qb.citation_count(min_count=5, max_count=50)

pmcid()

Search by PMC ID.

pmcid(pmcid: str) -> QueryBuilder

Parameters:

Returns: QueryBuilder (for method chaining)

Example:

qb.pmcid("PMC1234567")  # Also accepts "1234567"

source()

Search by data source.

source(source: str) -> QueryBuilder

Parameters:

Returns: QueryBuilder (for method chaining)

Example:

qb.source("MED")

accession_type()

Search by accession type.

accession_type(accession_type: str) -> QueryBuilder

Parameters:

Returns: QueryBuilder (for method chaining)

Example:

qb.accession_type("pdb")  # Automatically lowercased

cites()

Search for papers that cite a specific article.

cites(article_id: str, source: str = "med") -> QueryBuilder

Parameters:

Returns: QueryBuilder (for method chaining)

Example:

qb.cites("8521067", source="med")

Logical Operators

and_()

Add AND operator between query parts.

and_() -> QueryBuilder

Returns: QueryBuilder (for method chaining)

Example:

qb.keyword("cancer").and_().keyword("therapy")

or_()

Add OR operator between query parts.

or_() -> QueryBuilder

Returns: QueryBuilder (for method chaining)

Example:

qb.keyword("cancer").or_().keyword("tumor")

not_()

Add NOT operator before the next query part.

not_() -> QueryBuilder

Returns: QueryBuilder (for method chaining)

Example:

qb.keyword("cancer").and_().not_().keyword("review")

Advanced Methods

group()

Add a grouped sub-query.

group(builder: QueryBuilder) -> QueryBuilder

Parameters:

Returns: QueryBuilder (for method chaining)

Example:

sub_query = QueryBuilder().keyword("cancer").or_().keyword("tumor")
qb.group(sub_query).and_().keyword("therapy")

raw()

Add raw query string.

raw(query_string: str) -> QueryBuilder

Parameters:

Returns: QueryBuilder (for method chaining)

Example:

qb.raw("(cancer OR tumor) AND therapy")

Build & Validation

build()

Build and return the final query string.

build(validate: bool = True) -> str

Parameters:

Returns: str - The constructed query string

Example:

query = qb.keyword("cancer").and_().keyword("therapy").build()
print(query)  # "cancer AND therapy"

Persistence Methods

save()

Save query to JSON file in standard format.

save(
    file_path: str,
    platform: str = "pubmed",
    authors: list[dict[str, str]] | None = None,
    record_info: dict[str, Any] | None = None,
    date_info: dict[str, str] | None = None,
    database: list[str] | None = None,
    include_generic: bool = False
) -> None

Parameters:

Class Methods

from_string()

Load query from string.

@classmethod
from_string(
    query_string: str,
    platform: str = "pubmed",
    validate: bool = False
) -> QueryBuilder

Parameters:

Returns: QueryBuilder instance

from_file()

Load query from JSON file.

@classmethod
from_file(file_path: str, validate: bool = False) -> QueryBuilder

Parameters:

Returns: QueryBuilder instance

Translation & Evaluation

translate()

Translate query to another platform’s syntax.

translate(target_platform: str) -> str

Parameters:

Returns: str - Query in target platform syntax

Example:

qb = QueryBuilder.from_string('("cancer"[Title])', platform="pubmed")
wos_query = qb.translate("wos")  # TI="cancer"

to_query_object()

Convert to search-query Query object.

to_query_object(platform: str = "pubmed") -> Any

Parameters:

Returns: Query object from search-query package

evaluate()

Evaluate search effectiveness against records.

evaluate(records: dict[str, dict[str, str]], platform: str = "pubmed") -> dict[str, float]

Parameters:

Returns: dict with ‘recall’, ‘precision’, and ‘f1_score’

Systematic Review Integration

Log query to SearchLog for systematic review tracking.

log_to_search(
    search_log: Any,
    database: str = "Europe PMC",
    filters: dict[str, Any] | None = None,
    results_returned: int | None = None,
    notes: str | None = None,
    raw_results: Any = None,
    raw_results_dir: str | None = None,
    platform: str | None = None,
    export_path: str | None = None
) -> None

Parameters:

Field Types

The FieldType literal type includes all 150+ searchable fields:

Core Fields: title, abstract, author, journal, doi, pmid, pmcid

Date Fields: pub_year, first_pdate, e_pdate, update_date

Author Fields: affiliation, authorid, auth_first, auth_last

Content Fields: keyword, mesh, chemical, disease, gene_protein

Citation Fields: citation_count, cites, cited, reffed_by

Full-Text Fields: has_pdf, has_fulltext, open_access, in_pmc

And many more… See Field Metadata for complete list.

Error Handling

QueryBuilder uses specific error codes:

Examples

Basic Query Building

from pyeuropepmc import QueryBuilder

qb = QueryBuilder()

# Simple keyword search
query1 = qb.keyword("machine learning").build()
# "machine learning"

# Field-specific search
query2 = qb.field("author", "Smith J").build()
# "AUTH:Smith J"

# Complex query with operators
query3 = (qb
    .keyword("cancer", field="title")
    .and_()
    .keyword("therapy")
    .and_()
    .date_range(start_year=2020)
    .build())
# "(TITLE:cancer) AND therapy AND (PUB_YEAR:[2020 TO *])"

Advanced Query Patterns

# Citation-based filtering
query = (qb
    .keyword("CRISPR")
    .and_()
    .citation_count(min_count=50)
    .build())

# Multi-field search with OR logic
query = (qb
    .field("title", "machine learning")
    .or_()
    .field("abstract", "machine learning")
    .and_()
    .field("pub_year", 2023)
    .build())

# Complex nested query
sub_query = QueryBuilder().keyword("cancer").or_().keyword("tumor")
main_query = (qb
    .group(sub_query)
    .and_()
    .field("journal", "Nature")
    .build())

Systematic Review Workflow

from pyeuropepmc.utils.search_logging import start_search

# Start systematic review log
log = start_search("CRISPR Review", executed_by="Researcher Name")

# Build and execute query
qb = QueryBuilder()
query = (qb
    .keyword("CRISPR")
    .and_()
    .keyword("gene editing")
    .and_()
    .date_range(start_year=2018)
    .build())

# Log the search
qb.log_to_search(
    search_log=log,
    filters={"date_range": "2018+", "keywords": ["CRISPR", "gene editing"]},
    results_returned=150,
    notes="Initial broad search for CRISPR literature"
)

# Save the log
log.save("systematic_review_searches.json")

Query Persistence

# Save query
qb.save("my_search.json",
        authors=[{"name": "John Doe", "ORCID": "0000-0000-0000-0001"}],
        date_info={"data_entry": "2025-11-07", "search_conducted": "2025-11-07"})

# Load query
loaded_qb = QueryBuilder.from_file("my_search.json")
translated = loaded_qb.translate("wos")  # Translate to Web of Science syntax

See Also:

/home/jhe24/AID-PAIS/pyEuropePMC_project/docs/api/query-builder.md