sattwik21/gestr-jepa-isl
Text Classification
•
88.4k
•
Updated
•
267
Yes, I am logged in.
Hi, I ran the example streaming dataset code you provided above, it gives me the following error:
DataFilesNotFoundError Traceback (most recent call last)
Cell In[2], line 5
3 # Streams directly from the Hugging Face Hub, without downloading the dataset into disk or loading into memory
4 repo_id = "yaak-ai/L2D-v3"
----> 5 dataset = StreamingLeRobotDataset(repo_id)
File ~/sattwik/projects/robot-learning/act2/.venv/lib/python3.12/site-packages/lerobot/datasets/streaming_dataset.py:152, in StreamingLeRobotDataset.__init__(self, repo_id, root, episodes, image_transforms, delta_timestamps, tolerance_s, revision, force_cache_sync, streaming, buffer_size, max_num_shards, seed, rng, shuffle)
149 self.delta_timestamps = delta_timestamps
150 self.delta_indices = get_delta_indices(self.delta_timestamps, self.fps)
--> 152 self.hf_dataset: datasets.IterableDataset = load_dataset(
153 self.repo_id if not self.streaming_from_local else str(self.root),
154 split="train",
155 streaming=self.streaming,
156 data_files="data/*/*.parquet",
157 revision=self.revision,
158 )
160 self.num_shards = min(self.hf_dataset.num_shards, max_num_shards)
File ~/sattwik/projects/robot-learning/act2/.venv/lib/python3.12/site-packages/datasets/load.py:1392, in load_dataset(path, name, data_dir, data_files, split, cache_dir, features, download_config, download_mode, verification_mode, keep_in_memory, save_infos, revision, token, streaming, num_proc, storage_options, **config_kwargs)
1387 verification_mode = VerificationMode(
1388 (verification_mode or VerificationMode.BASIC_CHECKS) if not save_infos else VerificationMode.ALL_CHECKS
1389 )
1391 # Create a dataset builder
-> 1392 builder_instance = load_dataset_builder(
1393 path=path,
1394 name=name,
1395 data_dir=data_dir,
1396 data_files=data_files,
1397 cache_dir=cache_dir,
1398 features=features,
1399 download_config=download_config,
1400 download_mode=download_mode,
1401 revision=revision,
1402 token=token,
1403 storage_options=storage_options,
1404 **config_kwargs,
1405 )
1407 # Return iterable dataset in case of streaming
1408 if streaming:
File ~/sattwik/projects/robot-learning/act2/.venv/lib/python3.12/site-packages/datasets/load.py:1132, in load_dataset_builder(path, name, data_dir, data_files, cache_dir, features, download_config, download_mode, revision, token, storage_options, **config_kwargs)
1130 if features is not None:
1131 features = _fix_for_backward_compatible_features(features)
-> 1132 dataset_module = dataset_module_factory(
1133 path,
1134 revision=revision,
1135 download_config=download_config,
1136 download_mode=download_mode,
1137 data_dir=data_dir,
1138 data_files=data_files,
1139 cache_dir=cache_dir,
1140 )
1141 # Get dataset builder class
1142 builder_kwargs = dataset_module.builder_kwargs
File ~/sattwik/projects/robot-learning/act2/.venv/lib/python3.12/site-packages/datasets/load.py:1025, in dataset_module_factory(path, revision, download_config, download_mode, data_dir, data_files, cache_dir, **download_kwargs)
1023 raise ConnectionError(f"Couldn't reach the Hugging Face Hub for dataset '{path}': {e1}") from None
1024 if isinstance(e1, (DataFilesNotFoundError, DatasetNotFoundError, EmptyDatasetError)):
-> 1025 raise e1 from None
1026 if isinstance(e1, FileNotFoundError):
1027 raise FileNotFoundError(
1028 f"Couldn't find any data file at {relative_to_absolute_path(path)}. "
1029 f"Couldn't find '{path}' on the Hugging Face Hub either: {type(e1).__name__}: {e1}"
1030 ) from None
File ~/sattwik/projects/robot-learning/act2/.venv/lib/python3.12/site-packages/datasets/load.py:1004, in dataset_module_factory(path, revision, download_config, download_mode, data_dir, data_files, cache_dir, **download_kwargs)
994 else:
995 use_exported_dataset_infos = True
996 return HubDatasetModuleFactory(
997 path,
998 commit_hash=commit_hash,
999 data_dir=data_dir,
1000 data_files=data_files,
1001 download_config=download_config,
1002 download_mode=download_mode,
1003 use_exported_dataset_infos=use_exported_dataset_infos,
-> 1004 ).get_module()
1005 except GatedRepoError as e:
1006 message = f"Dataset '{path}' is a gated dataset on the Hub."
File ~/sattwik/projects/robot-learning/act2/.venv/lib/python3.12/site-packages/datasets/load.py:638, in HubDatasetModuleFactory.get_module(self)
631 patterns = get_data_patterns(base_path, download_config=self.download_config)
632 data_files = DataFilesDict.from_patterns(
633 patterns,
634 base_path=base_path,
635 allowed_extensions=ALL_ALLOWED_EXTENSIONS,
636 download_config=self.download_config,
637 )
--> 638 module_name, default_builder_kwargs = infer_module_for_data_files(
639 data_files=data_files,
640 path=self.name,
641 download_config=self.download_config,
642 )
643 data_files = data_files.filter(
644 extensions=_MODULE_TO_EXTENSIONS[module_name], file_names=_MODULE_TO_METADATA_FILE_NAMES[module_name]
645 )
646 module_path, _ = _PACKAGED_DATASETS_MODULES[module_name]
File ~/sattwik/projects/robot-learning/act2/.venv/lib/python3.12/site-packages/datasets/load.py:300, in infer_module_for_data_files(data_files, path, download_config)
298 raise ValueError(f"Couldn't infer the same data file format for all splits. Got {split_modules}")
299 if not module_name:
--> 300 raise DataFilesNotFoundError("No (supported) data files found" + (f" in {path}" if path else ""))
301 return module_name, default_builder_kwargs
DataFilesNotFoundError: No (supported) data files found in yaak-ai/L2D-v3