Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ python:
install:
- pip install --quiet -r requirements.txt
- pip install pytest pytest-cov
- pip install -U scikit-learn
# command to run tests
script:
- pytest --cov=./
Expand Down
101 changes: 30 additions & 71 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,77 +30,36 @@ Run the following commands to install fastNLP package.
pip install fastNLP
```

### Cloning From GitHub

If you just want to use fastNLP, use:
```shell
git clone https://github.com/fastnlp/fastNLP
cd fastNLP
```

### PyTorch Installation

Visit the [PyTorch official website] for installation instructions based on your system. In general, you could use:
```shell
# using conda
conda install pytorch torchvision -c pytorch
# or using pip
pip3 install torch torchvision
```

### TensorboardX Installation

```shell
pip3 install tensorboardX
```

## Project Structure

```
FastNLP
├── docs
├── fastNLP
│   ├── core
│   │   ├── action.py
│   │   ├── __init__.py
│   │   ├── loss.py
│   │   ├── metrics.py
│   │   ├── optimizer.py
│   │   ├── predictor.py
│   │   ├── preprocess.py
│   │   ├── README.md
│   │   ├── tester.py
│   │   └── trainer.py
│   ├── fastnlp.py
│   ├── __init__.py
│   ├── loader
│   │   ├── base_loader.py
│   │   ├── config_loader.py
│   │   ├── dataset_loader.py
│   │   ├── embed_loader.py
│   │   ├── __init__.py
│   │   └── model_loader.py
│   ├── models
│   ├── modules
│   │   ├── aggregation
│   │   ├── decoder
│   │   ├── encoder
│   │   ├── __init__.py
│   │   ├── interaction
│   │   ├── other_modules.py
│   │   └── utils.py
│   └── saver
├── LICENSE
├── README.md
├── reproduction
├── requirements.txt
├── setup.py
└── test
├── core
├── data_for_tests
├── __init__.py
├── loader
├── modules
└── readme_example.py

```
<table>
<tr>
<td><b> fastNLP </b></td>
<td> an open-source NLP library </td>
</tr>
<tr>
<td><b> fastNLP.core </b></td>
<td> trainer, tester, predictor </td>
</tr>
<tr>
<td><b> fastNLP.loader </b></td>
<td> all kinds of loaders/readers </td>
</tr>
<tr>
<td><b> fastNLP.models </b></td>
<td> a collection of NLP models </td>
</tr>
<tr>
<td><b> fastNLP.modules </b></td>
<td> a collection of PyTorch sub-models/components/wheels </td>
</tr>
<tr>
<td><b> fastNLP.saver </b></td>
<td> all kinds of savers/writers </td>
</tr>
<tr>
<td><b> fastNLP.fastnlp </b></td>
<td> a high-level interface for prediction </td>
</tr>
</table>
4 changes: 2 additions & 2 deletions docs/source/user/quickstart.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ pre-processing data, constructing model and training model.
from fastNLP.modules import aggregation
from fastNLP.modules import decoder

from fastNLP.loader.dataset_loader import ClassDatasetLoader
from fastNLP.loader.dataset_loader import ClassDataSetLoader
from fastNLP.loader.preprocess import ClassPreprocess
from fastNLP.core.trainer import ClassificationTrainer
from fastNLP.core.inference import ClassificationInfer
Expand Down Expand Up @@ -50,7 +50,7 @@ pre-processing data, constructing model and training model.
train_path = 'test/data_for_tests/text_classify.txt' # training set file

# load dataset
ds_loader = ClassDatasetLoader("train", train_path)
ds_loader = ClassDataSetLoader("train", train_path)
data = ds_loader.load()

# pre-process dataset
Expand Down
4 changes: 2 additions & 2 deletions examples/readme_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from fastNLP.core.predictor import ClassificationInfer
from fastNLP.core.preprocess import ClassPreprocess
from fastNLP.core.trainer import ClassificationTrainer
from fastNLP.loader.dataset_loader import ClassDatasetLoader
from fastNLP.loader.dataset_loader import ClassDataSetLoader
from fastNLP.models.base_model import BaseModel
from fastNLP.modules import aggregator
from fastNLP.modules import decoder
Expand Down Expand Up @@ -36,7 +36,7 @@ def forward(self, x):
train_path = './data_for_tests/text_classify.txt' # training set file

# load dataset
ds_loader = ClassDatasetLoader(train_path)
ds_loader = ClassDataSetLoader()
data = ds_loader.load()

# pre-process dataset
Expand Down
21 changes: 4 additions & 17 deletions fastNLP/core/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def __init__(self, dataset, batch_size, sampler, use_cuda):
:param dataset: a DataSet object
:param batch_size: int, the size of the batch
:param sampler: a Sampler object
:param use_cuda: bool, whetjher to use GPU
:param use_cuda: bool, whether to use GPU

"""
self.dataset = dataset
Expand All @@ -37,15 +37,12 @@ def __next__(self):
"""

:return batch_x: dict of (str: torch.LongTensor), which means (field name: tensor of shape [batch_size, padding_length])
batch_x also contains an item (str: list of int) about origin lengths,
which means ("field_name_origin_len": origin lengths).
E.g.
::
{'text': tensor([[ 0, 1, 2, 3, 0, 0, 0], 4, 5, 2, 6, 7, 8, 9]]), 'text_origin_len': [4, 7]})

batch_y: dict of (str: torch.LongTensor), which means (field name: tensor of shape [batch_size, padding_length])
All tensors in both batch_x and batch_y will be cuda tensors if use_cuda is True.
The names of fields are defined in preprocessor's convert_to_dataset method.

"""
if self.curidx >= len(self.idx_list):
Expand All @@ -54,34 +51,24 @@ def __next__(self):
endidx = min(self.curidx + self.batch_size, len(self.idx_list))
padding_length = {field_name: max(field_length[self.curidx: endidx])
for field_name, field_length in self.lengths.items()}
origin_lengths = {field_name: field_length[self.curidx: endidx]
for field_name, field_length in self.lengths.items()}

batch_x, batch_y = defaultdict(list), defaultdict(list)

# transform index to tensor and do padding for sequences
for idx in range(self.curidx, endidx):
x, y = self.dataset.to_tensor(idx, padding_length)
for name, tensor in x.items():
batch_x[name].append(tensor)
for name, tensor in y.items():
batch_y[name].append(tensor)

batch_origin_length = {}
# combine instances into a batch
# combine instances to form a batch
for batch in (batch_x, batch_y):
for name, tensor_list in batch.items():
if self.use_cuda:
batch[name] = torch.stack(tensor_list, dim=0).cuda()
else:
batch[name] = torch.stack(tensor_list, dim=0)

# add origin lengths in batch_x
for name, tensor in batch_x.items():
if self.use_cuda:
batch_origin_length[name + "_origin_len"] = torch.LongTensor(origin_lengths[name]).cuda()
else:
batch_origin_length[name + "_origin_len"] = torch.LongTensor(origin_lengths[name])
batch_x.update(batch_origin_length)

self.curidx = endidx
return batch_x, batch_y

Loading