{
  "_id": "6a101967acfb0bcc41c85f26",
  "Package": "blocking",
  "Type": "Package",
  "Title": "Various Blocking Methods for Entity Resolution",
  "Version": "1.0.3",
  "Authors@R": "c(person(given = \"Maciej\",\nfamily = \"Beręsewicz\",\nrole = c(\"aut\", \"cre\"),\nemail = \"maciej.beresewicz@ue.poznan.pl\",\ncomment = c(ORCID = \"0000-0002-8281-4301\")),\nperson(given = \"Adam\",\nfamily = \"Struzik\",\nrole = c(\"aut\", \"ctr\"),\nemail = \"adastr5@st.amu.edu.pl\"))",
  "Description": "The goal of 'blocking' is to provide blocking methods for\nrecord linkage and deduplication using approximate nearest\nneighbour (ANN) algorithms and graph techniques. It supports\nmultiple ANN implementations via 'rnndescent', 'RcppHNSW',\n'RcppAnnoy', and 'mlpack' packages, and provides integration\nwith the 'reclin2' package. The package generates shingles from\ncharacter strings and similarity vectors for record comparison,\nand includes evaluation metrics for assessing blocking\nperformance including false positive rate (FPR) and false\nnegative rate (FNR) estimates. For details see: Papadakis et\nal. (2020) <doi:10.1145/3377455>, Steorts et al. (2014)\n<doi:10.1007/978-3-319-11257-2_20>, Dasylva and Goussanou\n(2021)\n<https://www150.statcan.gc.ca/n1/en/catalogue/12-001-X202100200002>,\nDasylva and Goussanou (2022) <doi:10.1007/s42081-022-00153-3>.",
  "License": "GPL-3",
  "Encoding": "UTF-8",
  "LazyData": "true",
  "URL": "https://github.com/ncn-foreigners/blocking,\nhttps://ncn-foreigners.ue.poznan.pl/blocking/",
  "BugReports": "https://github.com/ncn-foreigners/blocking/issues",
  "Roxygen": "list(markdown = TRUE)",
  "VignetteBuilder": "knitr",
  "Config/roxygen2/version": "8.0.0",
  "Config/pak/sysreqs": "libglpk-dev libicu-dev libxml2-dev libx11-dev",
  "Repository": "https://ncn-foreigners.r-universe.dev",
  "Date/Publication": "2026-05-14 15:47:34 UTC",
  "RemoteUrl": "https://github.com/ncn-foreigners/blocking",
  "RemoteRef": "HEAD",
  "RemoteSha": "f06c0737b945bd0d4df56914a2b3fb8f4a97e73a",
  "NeedsCompilation": "no",
  "Packaged": {
    "Date": "2026-05-14 18:00:25 UTC",
    "User": "root"
  },
  "Author": "Maciej Beręsewicz [aut, cre] (ORCID:\n<https://orcid.org/0000-0002-8281-4301>),\nAdam Struzik [aut, ctr]",
  "Maintainer": "Maciej Beręsewicz <maciej.beresewicz@ue.poznan.pl>",
  "MD5sum": "196554c3a35fdd63157ebf9c0dad6668",
  "_user": "ncn-foreigners",
  "_type": "src",
  "_file": "blocking_1.0.3.tar.gz",
  "_fileid": "4466d3eb01d45b6495bd30f7b314b9ac8436f56640611ce0965bf7526b212666",
  "_filesize": 2044756,
  "_sha256": "4466d3eb01d45b6495bd30f7b314b9ac8436f56640611ce0965bf7526b212666",
  "_created": "2026-05-14T18:00:25.000Z",
  "_published": "2026-05-22T08:52:55.875Z",
  "_distro": "noble",
  "_jobs": [
    {
      "job": 77347255024,
      "time": 255,
      "config": "linux-devel-x86_64",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "7001244252"
    },
    {
      "job": 77347255173,
      "time": 216,
      "config": "linux-release-x86_64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7001232218"
    },
    {
      "job": 77347255283,
      "time": 218,
      "config": "macos-oldrel-arm64",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "7001232359"
    },
    {
      "job": 77347255247,
      "time": 190,
      "config": "macos-release-arm64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7001223315"
    },
    {
      "job": 77347254845,
      "time": 359,
      "config": "source",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7001157197"
    },
    {
      "job": 77347254747,
      "time": 146,
      "config": "wasm-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7156591586"
    },
    {
      "job": 77347254972,
      "time": 160,
      "config": "windows-devel",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "7001214054"
    },
    {
      "job": 77347255169,
      "time": 216,
      "config": "windows-oldrel",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "7001232981"
    },
    {
      "job": 77347255081,
      "time": 177,
      "config": "windows-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7001220798"
    }
  ],
  "_buildurl": "https://github.com/r-universe/ncn-foreigners/actions/runs/25876212529",
  "_status": "success",
  "_host": "GitHub-Actions",
  "_upstream": "https://github.com/ncn-foreigners/blocking",
  "_commit": {
    "id": "f06c0737b945bd0d4df56914a2b3fb8f4a97e73a",
    "author": "ASuGuit <adamstruzikguit@gmail.com>",
    "committer": "ASuGuit <adamstruzikguit@gmail.com>",
    "message": "fix errors in the evaluation functions\n",
    "time": 1778773654
  },
  "_maintainer": {
    "name": "Maciej Beręsewicz",
    "email": "maciej.beresewicz@ue.poznan.pl",
    "login": "berenz",
    "orcid": "0000-0002-8281-4301",
    "uuid": 3464669
  },
  "_registered": true,
  "_dependencies": [
    {
      "package": "R",
      "version": ">= 4.1.0",
      "role": "Depends"
    },
    {
      "package": "text2vec",
      "role": "Imports"
    },
    {
      "package": "tokenizers",
      "role": "Imports"
    },
    {
      "package": "RcppHNSW",
      "role": "Imports"
    },
    {
      "package": "RcppAnnoy",
      "role": "Imports"
    },
    {
      "package": "mlpack",
      "role": "Imports"
    },
    {
      "package": "rnndescent",
      "role": "Imports"
    },
    {
      "package": "igraph",
      "role": "Imports"
    },
    {
      "package": "data.table",
      "role": "Imports"
    },
    {
      "package": "methods",
      "role": "Imports"
    },
    {
      "package": "readr",
      "role": "Imports"
    },
    {
      "package": "utils",
      "role": "Imports"
    },
    {
      "package": "Matrix",
      "role": "Imports"
    },
    {
      "package": "tinytest",
      "role": "Suggests"
    },
    {
      "package": "knitr",
      "role": "Suggests"
    },
    {
      "package": "rmarkdown",
      "role": "Suggests"
    },
    {
      "package": "reclin2",
      "role": "Suggests"
    }
  ],
  "_owner": "ncn-foreigners",
  "_selfowned": true,
  "_usedby": 1,
  "_updates": [
    {
      "week": "2025-22",
      "n": 3
    },
    {
      "week": "2025-23",
      "n": 4
    },
    {
      "week": "2025-24",
      "n": 4
    },
    {
      "week": "2025-25",
      "n": 3
    },
    {
      "week": "2025-52",
      "n": 1
    },
    {
      "week": "2026-05",
      "n": 1
    },
    {
      "week": "2026-06",
      "n": 2
    },
    {
      "week": "2026-11",
      "n": 2
    },
    {
      "week": "2026-20",
      "n": 1
    }
  ],
  "_tags": [
    {
      "name": "1.0.2",
      "date": "2026-03-10"
    }
  ],
  "_topics": [
    "annoy",
    "approximate-nearest-neighbor-search",
    "deduplication",
    "entity-resolution",
    "hnsw",
    "igraph",
    "record-linkage"
  ],
  "_stars": 14,
  "_contributors": [
    {
      "user": "asuguit",
      "count": 69,
      "uuid": 203806001
    },
    {
      "user": "berenz",
      "count": 58,
      "uuid": 3464669
    }
  ],
  "_userbio": {
    "uuid": 87754555,
    "type": "organization",
    "name": "Project \"Towards census-like statistics for foreign-born populations\"",
    "description": "NCN OPUS 20 grant no. 2020/39/B/HS4/00941"
  },
  "_downloads": {
    "count": 564,
    "source": "https://cranlogs.r-pkg.org/downloads/total/last-month/blocking"
  },
  "_devurl": "https://github.com/ncn-foreigners/blocking",
  "_pkgdown": "https://ncn-foreigners.ue.poznan.pl/blocking/",
  "_searchresults": 17,
  "_rbuild": "4.6.0",
  "_assets": [
    "extra/blocking.html",
    "extra/citation.cff",
    "extra/citation.html",
    "extra/citation.json",
    "extra/citation.txt",
    "extra/contents.json",
    "extra/NEWS.html",
    "extra/NEWS.txt",
    "extra/readme.html",
    "extra/readme.md",
    "manual.pdf"
  ],
  "_homeurl": "https://github.com/ncn-foreigners/blocking",
  "_realowner": "ncn-foreigners",
  "_cranurl": true,
  "_releases": [
    {
      "version": "1.0.0",
      "date": "2025-06-13"
    },
    {
      "version": "1.0.1",
      "date": "2025-06-18"
    },
    {
      "version": "1.0.2",
      "date": "2026-03-11"
    }
  ],
  "_exports": [
    "blocking",
    "control_annoy",
    "control_hnsw",
    "control_kd",
    "control_lsh",
    "control_nnd",
    "controls_ann",
    "controls_txt",
    "est_block_error",
    "pair_ann"
  ],
  "_datasets": [
    {
      "name": "census",
      "title": "Fictional census data",
      "object": "census",
      "class": [
        "data.frame"
      ],
      "fields": [
        "person_id",
        "pername1",
        "pername2",
        "sex",
        "dob_day",
        "dob_mon",
        "dob_year",
        "hse_num",
        "enumcap",
        "enumpc",
        "str_nam",
        "cap_add",
        "census_id"
      ],
      "rows": 25343,
      "table": true,
      "tojson": true
    },
    {
      "name": "cis",
      "title": "Fictional customer data",
      "object": "cis",
      "class": [
        "data.frame"
      ],
      "fields": [
        "person_id",
        "pername1",
        "pername2",
        "sex",
        "dob_day",
        "dob_mon",
        "dob_year",
        "enumcap",
        "enumpc",
        "cis_id"
      ],
      "rows": 24613,
      "table": true,
      "tojson": true
    },
    {
      "name": "foreigners",
      "title": "Fictional 2024 population of foreigners in Poland",
      "object": "foreigners",
      "class": [
        "data.frame"
      ],
      "fields": [
        "fname",
        "sname",
        "surname",
        "date",
        "region",
        "country",
        "true_id"
      ],
      "rows": 110000,
      "table": true,
      "tojson": true
    },
    {
      "name": "RLdata500",
      "title": "RLdata500 dataset from the RecordLinkage package",
      "object": "RLdata500",
      "class": [
        "data.frame"
      ],
      "fields": [
        "fname_c1",
        "fname_c2",
        "lname_c1",
        "lname_c2",
        "by",
        "bm",
        "bd",
        "rec_id",
        "ent_id"
      ],
      "rows": 500,
      "table": true,
      "tojson": true
    }
  ],
  "_help": [
    {
      "page": "blocking",
      "title": "Block records based on character vectors",
      "topics": [
        "blocking"
      ]
    },
    {
      "page": "census",
      "title": "Fictional census data",
      "topics": [
        "census"
      ]
    },
    {
      "page": "cis",
      "title": "Fictional customer data",
      "topics": [
        "cis"
      ]
    },
    {
      "page": "control_annoy",
      "title": "Controls for the Annoy algorithm",
      "topics": [
        "control_annoy"
      ]
    },
    {
      "page": "control_hnsw",
      "title": "Controls for the HNSW algorithm",
      "topics": [
        "control_hnsw"
      ]
    },
    {
      "page": "control_kd",
      "title": "Controls for the k-d tree algorithm",
      "topics": [
        "control_kd"
      ]
    },
    {
      "page": "control_lsh",
      "title": "Controls for the LSH algorithm",
      "topics": [
        "control_lsh"
      ]
    },
    {
      "page": "control_nnd",
      "title": "Controls for the NND algorithm",
      "topics": [
        "control_nnd"
      ]
    },
    {
      "page": "controls_ann",
      "title": "Controls for approximate nearest neighbours algorithms",
      "topics": [
        "controls_ann"
      ]
    },
    {
      "page": "controls_txt",
      "title": "Controls for processing character data",
      "topics": [
        "controls_txt"
      ]
    },
    {
      "page": "est_block_error",
      "title": "Estimate errors due to blocking in record linkage",
      "topics": [
        "est_block_error"
      ]
    },
    {
      "page": "foreigners",
      "title": "Fictional 2024 population of foreigners in Poland",
      "topics": [
        "foreigners"
      ]
    },
    {
      "page": "pair_ann",
      "title": "Integration with the reclin2 package",
      "topics": [
        "pair_ann"
      ]
    },
    {
      "page": "RLdata500",
      "title": "RLdata500 dataset from the RecordLinkage package",
      "topics": [
        "RLdata500"
      ]
    }
  ],
  "_readme": "https://github.com/ncn-foreigners/blocking/raw/HEAD/README.md",
  "_rundeps": [
    "BH",
    "bit",
    "bit64",
    "cli",
    "clipr",
    "cpp11",
    "crayon",
    "data.table",
    "digest",
    "dqrng",
    "float",
    "glue",
    "hms",
    "igraph",
    "lattice",
    "lgr",
    "lifecycle",
    "magrittr",
    "Matrix",
    "MatrixExtra",
    "mlapi",
    "mlpack",
    "pillar",
    "pkgconfig",
    "prettyunits",
    "progress",
    "R6",
    "Rcpp",
    "RcppAnnoy",
    "RcppArmadillo",
    "RcppEnsmallen",
    "RcppHNSW",
    "readr",
    "RhpcBLASctl",
    "rlang",
    "rnndescent",
    "rsparse",
    "sitmo",
    "SnowballC",
    "stringi",
    "text2vec",
    "tibble",
    "tidyselect",
    "tokenizers",
    "tzdb",
    "utf8",
    "vctrs",
    "vroom",
    "withr"
  ],
  "_vignettes": [
    {
      "source": "v1-deduplication.Rmd",
      "filename": "v1-deduplication.html",
      "title": "Blocking records for deduplication",
      "author": "Maciej Beręsewicz",
      "engine": "knitr::rmarkdown",
      "headings": [
        "Setup",
        "Blocking for deduplication"
      ],
      "created": "2023-11-05 14:41:18",
      "modified": "2026-02-08 10:24:24",
      "commits": 16
    },
    {
      "source": "v2-reclin.Rmd",
      "filename": "v2-reclin.html",
      "title": "Blocking records for record linkage",
      "author": "Maciej Beręsewicz",
      "engine": "knitr::rmarkdown",
      "headings": [
        "Setup",
        "Data",
        "Linking datasets",
        "Using basic functionalities of blocking package",
        "Assessing the quality",
        "Compare results"
      ],
      "created": "2023-11-05 14:41:18",
      "modified": "2025-12-27 08:28:54",
      "commits": 15
    },
    {
      "source": "v3-integration.Rmd",
      "filename": "v3-integration.html",
      "title": "Integration with existing packages",
      "author": "Maciej Beręsewicz",
      "engine": "knitr::rmarkdown",
      "headings": [
        "Setup",
        "Data",
        "Integration with the reclin2 package",
        "Usage with fastLink package",
        "Usage with RecordLinkage package"
      ],
      "created": "2025-05-31 08:32:54",
      "modified": "2025-12-27 08:28:54",
      "commits": 3
    }
  ],
  "_score": 6.984031980271181,
  "_indexed": true,
  "_nocasepkg": "blocking",
  "_universes": [
    "ncn-foreigners",
    "berenz"
  ],
  "_binaries": [
    {
      "r": "4.7.0",
      "os": "linux",
      "version": "1.0.3",
      "date": "2026-05-14T18:03:39.000Z",
      "distro": "noble",
      "commit": "f06c0737b945bd0d4df56914a2b3fb8f4a97e73a",
      "fileid": "ac1ccb0a5607851d27206d0321a4c2d56b9a0c49c0f68d2e971fa4ca1ff7f734",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ncn-foreigners/actions/runs/25876212529"
    },
    {
      "r": "4.6.0",
      "os": "linux",
      "version": "1.0.3",
      "date": "2026-05-14T18:03:07.000Z",
      "distro": "noble",
      "commit": "f06c0737b945bd0d4df56914a2b3fb8f4a97e73a",
      "fileid": "30019b1dcb7b620b99210b68a5d389719aa3da91047c26ec6969b2ac7331c3b5",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ncn-foreigners/actions/runs/25876212529"
    },
    {
      "r": "4.5.3",
      "os": "mac",
      "version": "1.0.3",
      "date": "2026-05-14T18:02:34.000Z",
      "commit": "f06c0737b945bd0d4df56914a2b3fb8f4a97e73a",
      "fileid": "85b29d87108f52c2c3e10127148d0575262a51724051730aad74a3e6df83bcce",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ncn-foreigners/actions/runs/25876212529"
    },
    {
      "r": "4.6.0",
      "os": "mac",
      "version": "1.0.3",
      "date": "2026-05-14T18:02:28.000Z",
      "commit": "f06c0737b945bd0d4df56914a2b3fb8f4a97e73a",
      "fileid": "c006d9ac93c9ce34d4e227adbc48340f0229bc996a4b9cba9923f4dfade135bd",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ncn-foreigners/actions/runs/25876212529"
    },
    {
      "r": "4.7.0",
      "os": "win",
      "version": "1.0.3",
      "date": "2026-05-14T18:02:21.000Z",
      "commit": "f06c0737b945bd0d4df56914a2b3fb8f4a97e73a",
      "fileid": "70a089b983ae0a0738e4b3d87eec3320d6bc2a661e9647be9d4d675d35f2eb38",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ncn-foreigners/actions/runs/25876212529"
    },
    {
      "r": "4.5.3",
      "os": "win",
      "version": "1.0.3",
      "date": "2026-05-14T18:03:24.000Z",
      "commit": "f06c0737b945bd0d4df56914a2b3fb8f4a97e73a",
      "fileid": "2bdb4268d919cbc1b090cca34892285680687daeb356fc3385f1338e52326556",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ncn-foreigners/actions/runs/25876212529"
    },
    {
      "r": "4.6.0",
      "os": "win",
      "version": "1.0.3",
      "date": "2026-05-14T18:02:37.000Z",
      "commit": "f06c0737b945bd0d4df56914a2b3fb8f4a97e73a",
      "fileid": "4e12737b1980a2e153c95e27543fc5429c2d10cbea7bb35dad853c4676ccac74",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ncn-foreigners/actions/runs/25876212529"
    },
    {
      "r": "4.6.0",
      "os": "wasm",
      "version": "1.0.3",
      "date": "2026-05-22T08:52:37.000Z",
      "commit": "f06c0737b945bd0d4df56914a2b3fb8f4a97e73a",
      "fileid": "fc6dd158e0853906133e8752d4e61e4dc48a388bfbec8d3cef13089e2fdb1f6c",
      "status": "success",
      "buildurl": "https://github.com/r-universe/ncn-foreigners/actions/runs/25876212529"
    }
  ]
}