A Rust library for validating CSV record data based on rules defined directly in your structs using the #[derive(ValidateCsv)]
macro.
Add the following to your Cargo.toml
:
[dependencies]
csv-schema-validator = "0.1.2"
serde = { version = "1.0", features = ["derive"] }
csv = "1.3"
regex = "1.11"
once_cell = "1.21"
use serde::Deserialize;
use csv::Reader;
use csv_schema_validator::{ValidateCsv, ValidationError};
// Define your struct with validation annotations
#[derive(Deserialize, ValidateCsv, Debug)]
struct TestRecord {
#[validate(range(min = 0.0, max = 100.0))]
grade: f64,
#[validate(regex = r"^[A-Z]{3}\d{4}$")]
code: String,
#[validate(required, length(min = 10, max = 50), not_blank)]
name: Option<String>,
#[validate(custom = "length_validation")]
comments: String,
#[validate(required, one_of("short", "medium", "long"))]
more_comments: Option<String>,
#[validate(required, not_in("forbidden", "banned"))]
tag: Option<String>,
}
// Custom validator: comments must be at most 50 characters
fn length_validator(s: &str) -> Result<(), String> {
if s.len() <= 50 {
Ok(())
} else {
Err("Comments too long".into())
}
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut reader = Reader::from_path("data.csv")?;
for result in reader.deserialize() {
let rec: Record = result?;
rec.validate_csv()?;
println!("Record valid: {:?}", rec);
}
Ok(())
}
#[validate(range(min = 0.0, max = 100.0))]
grade: f64,
Ensures that grade
is between 0.0 and 100.0 (inclusive).
#[validate(regex = r"^[A-Z]{3}\d{4}$")]
code: String,
Validates the field against a regular expression.
#[validate(required)]
name: Option<String>,
Ensures that the Option
is not None
.
#[validate(custom = "path::to::func")]
comments: String,
Calls your custom function fn(&T) -> Result<(), String>
for additional checks.
#[validate(required, length(min = 10, max = 50))]
name: Option<String>,
Checks for all spaces or all whitespaces field (Strings):
#[validate(required, length(min = 10, max = 50), not_blank)]
name: Option<String>,
Checks if the string has one of the allowed values:
#[validate(required, one_of("short", "medium", "long"))]
more_comments: Option<String>,
Checks if the string has one of the not allowed values:
#[validate(required, not_in("forbidden", "banned"))]
tag: Option<String>,
The macro validates the type it is annotating, only strucs with named fields are allowed:
use serde::Deserialize;
use csv_schema_validator::ValidateCsv;
#[derive(Deserialize, ValidateCsv)]
struct TupleStruct(f64, String);
#[derive(Deserialize, ValidateCsv)]
enum Status {
Success { code: f64, message: String },
Error(f64, String),
Unknown,
}
fn main() {
let record = TupleStruct(42.0, "ABC1234".to_string());
let s = Status::Success { code: 200.0, message: "OK".into() };
let _ = record.validate_csv();
let _ = s.validate_csv();
}
Trying to compile this code will result in errors:
cargo run
error: only structs with named fields (e.g., `struct S { a: T }`) are supported
--> src/main.rs:5:19
|
5 | struct TupleStruct(f64, String);
| ^^^^^^^^^^^^^
error: only structs are supported
--> src/main.rs:8:1
|
8 | / enum Status {
9 | | Success { code: f64, message: String },
10 | | Error(f64, String),
11 | | Unknown,
12 | | }
| |_^
This is an example which reads a csv file:
Cargo.toml
:
[package]
name = "use-csv-validator"
version = "0.1.1"
edition = "2021"
[dependencies]
csv = "1.1"
serde = { version = "1.0", features = ["derive"] }
csv-schema-validator = "0.1.2"
src/main.rs
:
use std::error::Error;
use csv::ReaderBuilder;
use serde::Deserialize;
use csv_schema_validator::{ValidateCsv, ValidationError};
/// Custom validator: ensure comments string isn't too long
fn length_validation(s: &str) -> Result<(), String> {
if s.len() <= 20 {
Ok(())
} else {
Err("Comments too long".into())
}
}
#[derive(Deserialize, ValidateCsv, Debug)]
struct TestRecord {
#[validate(range(min = 0.0, max = 100.0))]
grade: f64,
#[validate(regex = r"^[A-Z]{3}\d{4}$")]
code: String,
#[validate(required, length(min = 10, max = 50), not_blank)]
name: Option<String>,
#[validate(custom = "length_validation")]
comments: String,
#[serde(rename = "more")]
#[validate(required, one_of("short", "medium", "long"))]
more_comments: Option<String>,
#[validate(required, not_in("forbidden", "banned"))]
tag: Option<String>,
}
fn main() -> Result<(), Box<dyn Error>> {
// open the CSV file placed alongside Cargo.toml
let mut reader = ReaderBuilder::new()
.has_headers(true)
.from_path("data.csv")?;
// for each record, deserialize and validate
for (i, result) in reader.deserialize::<TestRecord>().enumerate() {
let record = result?;
match record.validate_csv() {
Ok(()) => println!("Line {}: Record is valid: {:?}", i + 1, record),
Err(errors) => {
eprintln!("Line {}: Validation errors:", i + 1);
for ValidationError { field, message } in errors {
eprintln!(" Field `{}`: {}", field, message);
}
}
}
}
Ok(())
}
data.csv
:
90.0,XYZ5678,Bob Marley,Too long comment indeed,medium,allowed
110.0,XYZ4567, ,ok,short,allowed
95.0,xWF9101,Charlie,code,long,allowed
110.0,XYZ2345,Dave Copperfield,range,short,allowed
34.0,XYZ6789,,name,medium,allowed
78.0,XYZ7890,Frank,more,invalid comment,allowed
88.0,XYZ4567,Grace,All good,short,
90.0,XYZ3567,Grace of All Times,All good,medium,forbidden
f34s,XYZ3456,Eve,comments,short,invalid grade
Running this example will generate these messages:
Line 1: Record is valid: TestRecord { grade: 85.5, code: "XYZ1234", name: Some("Alice Smith"), comments: "All good", more_comments: Some("short"), tag: Some("allowed") }
Line 2: Validation errors:
Field `comments`: Comments too long
Line 3: Validation errors:
Field `grade`: value out of expected range: 0 to 100
Field `name`: length out of expected range: 10 to 50
Field `name`: must not be blank or contain only whitespace
Line 4: Validation errors:
Field `code`: does not match the expected pattern
Field `name`: length out of expected range: 10 to 50
Line 5: Validation errors:
Field `grade`: value out of expected range: 0 to 100
Line 6: Validation errors:
Field `name`: mandatory field
Line 7: Validation errors:
Field `name`: length out of expected range: 10 to 50
Field `more_comments`: invalid value
Line 8: Validation errors:
Field `name`: length out of expected range: 10 to 50
Field `tag`: mandatory field
Line 9: Validation errors:
Field `tag`: value not allowed
Error: Error(Deserialize { pos: Some(Position { byte: 448, line: 11, record: 10 }), err: DeserializeError { field: Some(0), kind: ParseFloat(ParseFloatError { kind: Invalid }) } })
serde
and csv
crates.While the csv
crate provides low‑level parsing and some helper methods, this derive‑based approach offers:
serde::Deserialize
types, so you get compile‑time guarantees on types and validations in one place.Vec<ValidationError>
, instead of ad‑hoc early exits.By contrast, using the csv
crate directly may require manual loops over records and explicit match
/if
chains for each validation, leading to more boilerplate and potential for missing checks.
#![no_std]
environments).serde
1.0csv
1.3regex
1.11Feel free to open issues and submit pull requests. See CONTRIBUTING.md for details.
This project is licensed under the MIT License. See the LICENSE file for details.